[clang-tools-extra] r265691 - [clang-tidy] add new checker for string literal with NUL character.

Etienne Bergeron via cfe-commits cfe-commits at lists.llvm.org
Thu Apr 7 09:16:36 PDT 2016


Author: etienneb
Date: Thu Apr  7 11:16:36 2016
New Revision: 265691

URL: http://llvm.org/viewvc/llvm-project?rev=265691&view=rev
Log:
[clang-tidy] add new checker for string literal with NUL character.

Summary:
This patch adds the support for detecting suspicious string
literals and their //incorrect// usage.

The following example shows a incorrect character escaping leading 
to an embedded NUL character. 
```
  std::string str = "\0x42";   // Should be "\x42".
```

The patch also add detection of truncated literal when a literal
is passed to a string constructor.

Reviewers: hokein, alexfh

Subscribers: LegalizeAdulthood, bcraig, Eugene.Zelenko, bkramer, cfe-commits

Differential Revision: http://reviews.llvm.org/D18783

Added:
    clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp
    clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h
    clang-tools-extra/trunk/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst
    clang-tools-extra/trunk/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp
Modified:
    clang-tools-extra/trunk/clang-tidy/misc/CMakeLists.txt
    clang-tools-extra/trunk/clang-tidy/misc/MiscTidyModule.cpp
    clang-tools-extra/trunk/docs/ReleaseNotes.rst
    clang-tools-extra/trunk/docs/clang-tidy/checks/list.rst

Modified: clang-tools-extra/trunk/clang-tidy/misc/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/misc/CMakeLists.txt?rev=265691&r1=265690&r2=265691&view=diff
==============================================================================
--- clang-tools-extra/trunk/clang-tidy/misc/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clang-tidy/misc/CMakeLists.txt Thu Apr  7 11:16:36 2016
@@ -23,6 +23,7 @@ add_clang_library(clangTidyMiscModule
   SizeofContainerCheck.cpp
   StaticAssertCheck.cpp
   StringIntegerAssignmentCheck.cpp
+  StringLiteralWithEmbeddedNulCheck.cpp
   SuspiciousMissingCommaCheck.cpp
   SuspiciousSemicolonCheck.cpp
   SwappedArgumentsCheck.cpp

Modified: clang-tools-extra/trunk/clang-tidy/misc/MiscTidyModule.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/misc/MiscTidyModule.cpp?rev=265691&r1=265690&r2=265691&view=diff
==============================================================================
--- clang-tools-extra/trunk/clang-tidy/misc/MiscTidyModule.cpp (original)
+++ clang-tools-extra/trunk/clang-tidy/misc/MiscTidyModule.cpp Thu Apr  7 11:16:36 2016
@@ -31,6 +31,7 @@
 #include "SizeofContainerCheck.h"
 #include "StaticAssertCheck.h"
 #include "StringIntegerAssignmentCheck.h"
+#include "StringLiteralWithEmbeddedNulCheck.h"
 #include "SuspiciousMissingCommaCheck.h"
 #include "SuspiciousSemicolonCheck.h"
 #include "SwappedArgumentsCheck.h"
@@ -89,6 +90,8 @@ public:
         "misc-static-assert");
     CheckFactories.registerCheck<StringIntegerAssignmentCheck>(
         "misc-string-integer-assignment");
+    CheckFactories.registerCheck<StringLiteralWithEmbeddedNulCheck>(
+        "misc-string-literal-with-embedded-nul");
     CheckFactories.registerCheck<SuspiciousMissingCommaCheck>(
         "misc-suspicious-missing-comma");
     CheckFactories.registerCheck<SuspiciousSemicolonCheck>(

Added: clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp?rev=265691&view=auto
==============================================================================
--- clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp (added)
+++ clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.cpp Thu Apr  7 11:16:36 2016
@@ -0,0 +1,83 @@
+//===--- StringLiteralWithEmbeddedNulCheck.cpp - clang-tidy----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "StringLiteralWithEmbeddedNulCheck.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+AST_MATCHER(StringLiteral, containsNul) {
+  for (size_t i = 0; i < Node.getLength(); ++i)
+    if (Node.getCodeUnit(i) == '\0')
+      return true;
+  return false;
+}
+
+void StringLiteralWithEmbeddedNulCheck::registerMatchers(MatchFinder *Finder) {
+  // Match a string that contains embedded NUL character. Extra-checks are
+  // applied in |check| to find incorectly escaped characters.
+  Finder->addMatcher(stringLiteral(containsNul()).bind("strlit"), this);
+
+  // The remaining checks only apply to C++.
+  if (!getLangOpts().CPlusPlus)
+    return;
+
+  const auto StrLitWithNul =
+      ignoringParenImpCasts(stringLiteral(containsNul()).bind("truncated"));
+
+  // Match string constructor.
+  const auto StringConstructorExpr = expr(anyOf(
+      cxxConstructExpr(argumentCountIs(1),
+                       hasDeclaration(cxxMethodDecl(hasName("basic_string")))),
+      // If present, the second argument is the alloc object which must not
+      // be present explicitly.
+      cxxConstructExpr(argumentCountIs(2),
+                       hasDeclaration(cxxMethodDecl(hasName("basic_string"))),
+                       hasArgument(1, cxxDefaultArgExpr()))));
+
+  // Detect passing a suspicious string literal to a string constructor.
+  // example: std::string str = "abc\0def";
+  Finder->addMatcher(
+      cxxConstructExpr(StringConstructorExpr, hasArgument(0, StrLitWithNul)),
+      this);
+
+  // Detect passing a suspicious string literal through an overloaded operator.
+  Finder->addMatcher(cxxOperatorCallExpr(hasAnyArgument(StrLitWithNul)), this);
+}
+
+void StringLiteralWithEmbeddedNulCheck::check(
+    const MatchFinder::MatchResult &Result) {
+  if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) {
+    for (size_t Offset = 0, Length = SL->getLength(); Offset < Length;
+         ++Offset) {
+      // Find a sequence of character like "\0x12".
+      if (Offset + 3 < Length && SL->getCodeUnit(Offset) == '\0' &&
+          SL->getCodeUnit(Offset + 1) == 'x' &&
+          isDigit(SL->getCodeUnit(Offset + 2)) &&
+          isDigit(SL->getCodeUnit(Offset + 3))) {
+        diag(SL->getLocStart(), "suspicious embedded NUL character");
+        return;
+      }
+    }
+  }
+
+  if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("truncated")) {
+    diag(SL->getLocStart(),
+         "truncated string literal with embedded NUL character");
+  }
+}
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang

Added: clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h?rev=265691&view=auto
==============================================================================
--- clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h (added)
+++ clang-tools-extra/trunk/clang-tidy/misc/StringLiteralWithEmbeddedNulCheck.h Thu Apr  7 11:16:36 2016
@@ -0,0 +1,35 @@
+//===--- StringLiteralWithEmbeddedNulCheck.h - clang-tidy--------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H
+
+#include "../ClangTidy.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+/// Find suspicious string literals with embedded NUL characters.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/misc-string-literal-with-embedded-nul.html
+class StringLiteralWithEmbeddedNulCheck : public ClangTidyCheck {
+public:
+  StringLiteralWithEmbeddedNulCheck(StringRef Name, ClangTidyContext *Context)
+      : ClangTidyCheck(Name, Context) {}
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+};
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_STRING_LITERAL_WITH_EMBEDDED_NUL_H

Modified: clang-tools-extra/trunk/docs/ReleaseNotes.rst
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/docs/ReleaseNotes.rst?rev=265691&r1=265690&r2=265691&view=diff
==============================================================================
--- clang-tools-extra/trunk/docs/ReleaseNotes.rst (original)
+++ clang-tools-extra/trunk/docs/ReleaseNotes.rst Thu Apr  7 11:16:36 2016
@@ -97,6 +97,12 @@ identified.  The improvements since the
   Warns when there is a explicit redundant cast of a calculation result to a
   bigger type.
 
+- New `misc-string-literal-with-embedded-nul
+  <http://clang.llvm.org/extra/clang-tidy/checks/misc-string-literal-with-embedded-nul.html>`_ check
+
+  Warns about suspicious NUL character in string literals which may lead to
+  truncation or invalid character escaping.
+
 - New `misc-suspicious-missing-comma
   <http://clang.llvm.org/extra/clang-tidy/checks/misc-suspicious-missing-comma.html>`_ check
 

Modified: clang-tools-extra/trunk/docs/clang-tidy/checks/list.rst
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/docs/clang-tidy/checks/list.rst?rev=265691&r1=265690&r2=265691&view=diff
==============================================================================
--- clang-tools-extra/trunk/docs/clang-tidy/checks/list.rst (original)
+++ clang-tools-extra/trunk/docs/clang-tidy/checks/list.rst Thu Apr  7 11:16:36 2016
@@ -66,6 +66,7 @@ Clang-Tidy Checks
    misc-sizeof-container
    misc-static-assert
    misc-string-integer-assignment
+   misc-string-literal-with-embedded-nul
    misc-suspicious-missing-comma
    misc-suspicious-semicolon
    misc-swapped-arguments

Added: clang-tools-extra/trunk/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst?rev=265691&view=auto
==============================================================================
--- clang-tools-extra/trunk/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst (added)
+++ clang-tools-extra/trunk/docs/clang-tidy/checks/misc-string-literal-with-embedded-nul.rst Thu Apr  7 11:16:36 2016
@@ -0,0 +1,38 @@
+.. title:: clang-tidy - misc-string-literal-with-embedded-nul
+
+misc-string-literal-with-embedded-nul
+=====================================
+
+Finds occurences of string literal with embedded NUL character and validates
+their usage.
+
+
+Invalid escaping
+^^^^^^^^^^^^^^^^
+
+Special characters can be escaped within a string literal by using their
+hexadecimal encoding like ``\x42``. A common mistake is to escape them
+like this ``\0x42`` where the ``\0`` stands for the NUL character.
+
+.. code:: c++
+
+  const char* Example[] = "Invalid character: \0x12 should be \x12";
+  const char* Bytes[] = "\x03\0x02\0x01\0x00\0xFF\0xFF\0xFF";
+
+
+Truncated literal
+^^^^^^^^^^^^^^^^^
+
+String-like classes can manipulate strings with embedded NUL as they are
+keeping track of the bytes and the length. This is not the case for a
+``char*`` (NUL-terminated) string.
+
+A common mistake is to pass a string-literal with embedded NUL to a string
+constructor expecting a NUL-terminated string. The bytes after the first NUL
+character are truncated.
+
+.. code:: c++
+
+  std::string str("abc\0def");  // "def" is truncated
+  str += "\0";                  // This statement is doing nothing
+  if (str == "\0abc") return;   // This expression is always true

Added: clang-tools-extra/trunk/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp?rev=265691&view=auto
==============================================================================
--- clang-tools-extra/trunk/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp (added)
+++ clang-tools-extra/trunk/test/clang-tidy/misc-string-literal-with-embedded-nul.cpp Thu Apr  7 11:16:36 2016
@@ -0,0 +1,85 @@
+// RUN: %check_clang_tidy %s misc-string-literal-with-embedded-nul %t
+
+namespace std {
+template <typename T>
+class allocator {};
+template <typename T>
+class char_traits {};
+template <typename C, typename T, typename A>
+struct basic_string {
+  typedef basic_string<C, T, A> _Type;
+  basic_string();
+  basic_string(const C *p, const A &a = A());
+
+  _Type& operator+=(const C* s);
+  _Type& operator=(const C* s);
+};
+
+typedef basic_string<char, std::char_traits<char>, std::allocator<char>> string;
+typedef basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t>> wstring;
+}
+
+bool operator==(const std::string&, const char*);
+bool operator==(const char*, const std::string&);
+
+
+const char Valid[] = "This is valid \x12.";
+const char Strange[] = "This is strange \0x12 and must be fixed";
+// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: suspicious embedded NUL character [misc-string-literal-with-embedded-nul]
+
+const char textA[] = "\0x01\0x02\0x03\0x04";
+// CHECK-MESSAGES: :[[@LINE-1]]:22: warning: suspicious embedded NUL character
+const wchar_t textW[] = L"\0x01\0x02\0x03\0x04";
+// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: suspicious embedded NUL character
+
+const char A[] = "\0";
+const char B[] = "\0x";
+const char C[] = "\0x1";
+const char D[] = "\0x11";
+// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: suspicious embedded NUL character
+
+const wchar_t E[] = L"\0";
+const wchar_t F[] = L"\0x";
+const wchar_t G[] = L"\0x1";
+const wchar_t H[] = L"\0x11";
+// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: suspicious embedded NUL character
+
+const char I[] = "\000\000\000\000";
+const char J[] = "\0\0\0\0\0\0";
+const char K[] = "";
+
+const char L[] = "\0x12" "\0x12" "\0x12" "\0x12";
+// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: suspicious embedded NUL character
+
+void TestA() {
+  std::string str1 = "abc\0def";
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: truncated string literal
+  std::string str2 = "\0";
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: truncated string literal
+  std::string str3("\0");
+  // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: truncated string literal
+  std::string str4{"\x00\x01\x02\x03"};
+  // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: truncated string literal
+
+  std::string str;
+  str += "abc\0def";
+  // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: truncated string literal
+  str = "abc\0def";
+  // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: truncated string literal
+
+  if (str == "abc\0def") return;
+  // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: truncated string literal
+  if ("abc\0def" == str) return;
+  // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: truncated string literal
+}
+
+void TestW() {
+  std::wstring str1 = L"abc\0def";
+  // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: truncated string literal
+  std::wstring str2 = L"\0";
+  // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: truncated string literal
+  std::wstring str3(L"\0");
+  // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: truncated string literal
+  std::wstring str4{L"\x00\x01\x02\x03"};
+  // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: truncated string literal
+}




More information about the cfe-commits mailing list