[clang-tools-extra] 04410c5 - [clang-tidy] extend bugprone-signed-char-misuse check.

Tamás Zolnai via cfe-commits cfe-commits at lists.llvm.org
Sat Mar 14 12:04:26 PDT 2020


Author: Tamás Zolnai
Date: 2020-03-14T20:00:00+01:00
New Revision: 04410c565aa08b703ef5d11b454e7fba47163e3c

URL: https://github.com/llvm/llvm-project/commit/04410c565aa08b703ef5d11b454e7fba47163e3c
DIFF: https://github.com/llvm/llvm-project/commit/04410c565aa08b703ef5d11b454e7fba47163e3c.diff

LOG: [clang-tidy] extend bugprone-signed-char-misuse check.

Summary:
Cover a new use case when using a 'signed char' as an integer
might lead to issue with non-ASCII characters. Comparing
a 'signed char' with an 'unsigned char' using equality / unequality
operator produces an unexpected result for non-ASCII characters.

Reviewers: aaron.ballman, alexfh, hokein, njames93

Reviewed By: njames93

Subscribers: xazax.hun, cfe-commits

Tags: #clang, #clang-tools-extra

Differential Revision: https://reviews.llvm.org/D75749

Added: 
    

Modified: 
    clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp
    clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h
    clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst
    clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp
index 32949a878497..732ccbc9dd2a 100644
--- a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp
@@ -18,6 +18,8 @@ namespace clang {
 namespace tidy {
 namespace bugprone {
 
+static constexpr int UnsignedASCIIUpperBound = 127;
+
 static Matcher<TypedefDecl> hasAnyListedName(const std::string &Names) {
   const std::vector<std::string> NameList =
       utils::options::parseStringList(Names);
@@ -33,25 +35,29 @@ void SignedCharMisuseCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
   Options.store(Opts, "CharTypdefsToIgnore", CharTypdefsToIgnoreList);
 }
 
-void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) {
+// Create a matcher for char -> integer cast.
+BindableMatcher<clang::Stmt> SignedCharMisuseCheck::charCastExpression(
+    bool IsSigned, const Matcher<clang::QualType> &IntegerType,
+    const std::string &CastBindName) const {
   // We can ignore typedefs which are some kind of integer types
   // (e.g. typedef char sal_Int8). In this case, we don't need to
   // worry about the misinterpretation of char values.
   const auto IntTypedef = qualType(
       hasDeclaration(typedefDecl(hasAnyListedName(CharTypdefsToIgnoreList))));
 
-  const auto SignedCharType = expr(hasType(qualType(
-      allOf(isAnyCharacter(), isSignedInteger(), unless(IntTypedef)))));
-
-  const auto IntegerType = qualType(allOf(isInteger(), unless(isAnyCharacter()),
-                                          unless(booleanType())))
-                               .bind("integerType");
+  auto CharTypeExpr = expr();
+  if (IsSigned) {
+    CharTypeExpr = expr(hasType(
+        qualType(isAnyCharacter(), isSignedInteger(), unless(IntTypedef))));
+  } else {
+    CharTypeExpr = expr(hasType(qualType(
+        isAnyCharacter(), unless(isSignedInteger()), unless(IntTypedef))));
+  }
 
-  // We are interested in signed char -> integer conversion.
   const auto ImplicitCastExpr =
-      implicitCastExpr(hasSourceExpression(SignedCharType),
+      implicitCastExpr(hasSourceExpression(CharTypeExpr),
                        hasImplicitDestinationType(IntegerType))
-          .bind("castExpression");
+          .bind(CastBindName);
 
   const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr));
   const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr));
@@ -59,44 +65,84 @@ void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) {
 
   // We catch any type of casts to an integer. We need to have these cast
   // expressions explicitly to catch only those casts which are direct children
-  // of an assignment/declaration.
-  const auto CastExpr = expr(anyOf(ImplicitCastExpr, CStyleCastExpr,
-                                   StaticCastExpr, FunctionalCastExpr));
+  // of the checked expressions. (e.g. assignment, declaration).
+  return expr(anyOf(ImplicitCastExpr, CStyleCastExpr, StaticCastExpr,
+                    FunctionalCastExpr));
+}
 
-  // Catch assignments with the suspicious type conversion.
-  const auto AssignmentOperatorExpr = expr(binaryOperator(
-      hasOperatorName("="), hasLHS(hasType(IntegerType)), hasRHS(CastExpr)));
+void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) {
+  const auto IntegerType =
+      qualType(isInteger(), unless(isAnyCharacter()), unless(booleanType()))
+          .bind("integerType");
+  const auto SignedCharCastExpr =
+      charCastExpression(true, IntegerType, "signedCastExpression");
+  const auto UnSignedCharCastExpr =
+      charCastExpression(false, IntegerType, "unsignedCastExpression");
+
+  // Catch assignments with singed char -> integer conversion.
+  const auto AssignmentOperatorExpr =
+      expr(binaryOperator(hasOperatorName("="), hasLHS(hasType(IntegerType)),
+                          hasRHS(SignedCharCastExpr)));
 
   Finder->addMatcher(AssignmentOperatorExpr, this);
 
-  // Catch declarations with the suspicious type conversion.
-  const auto Declaration =
-      varDecl(isDefinition(), hasType(IntegerType), hasInitializer(CastExpr));
+  // Catch declarations with singed char -> integer conversion.
+  const auto Declaration = varDecl(isDefinition(), hasType(IntegerType),
+                                   hasInitializer(SignedCharCastExpr));
 
   Finder->addMatcher(Declaration, this);
+
+  // Catch signed char/unsigned char comparison.
+  const auto CompareOperator =
+      expr(binaryOperator(hasAnyOperatorName("==", "!="),
+                          anyOf(allOf(hasLHS(SignedCharCastExpr),
+                                      hasRHS(UnSignedCharCastExpr)),
+                                allOf(hasLHS(UnSignedCharCastExpr),
+                                      hasRHS(SignedCharCastExpr)))))
+          .bind("comparison");
+
+  Finder->addMatcher(CompareOperator, this);
 }
 
 void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) {
-  const auto *CastExpression =
-      Result.Nodes.getNodeAs<ImplicitCastExpr>("castExpression");
-  const auto *IntegerType = Result.Nodes.getNodeAs<QualType>("integerType");
-  assert(CastExpression);
-  assert(IntegerType);
+  const auto *SignedCastExpression =
+      Result.Nodes.getNodeAs<ImplicitCastExpr>("signedCastExpression");
 
-  // Ignore the match if we know that the value is not negative.
+  // Ignore the match if we know that the signed char's value is not negative.
   // The potential misinterpretation happens for negative values only.
   Expr::EvalResult EVResult;
-  if (!CastExpression->isValueDependent() &&
-      CastExpression->getSubExpr()->EvaluateAsInt(EVResult, *Result.Context)) {
-    llvm::APSInt Value1 = EVResult.Val.getInt();
-    if (Value1.isNonNegative())
+  if (!SignedCastExpression->isValueDependent() &&
+      SignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult,
+                                                        *Result.Context)) {
+    llvm::APSInt Value = EVResult.Val.getInt();
+    if (Value.isNonNegative())
       return;
   }
 
-  diag(CastExpression->getBeginLoc(),
-       "'signed char' to %0 conversion; "
-       "consider casting to 'unsigned char' first.")
-      << *IntegerType;
+  if (const auto *Comparison = Result.Nodes.getNodeAs<Expr>("comparison")) {
+    const auto *UnSignedCastExpression =
+        Result.Nodes.getNodeAs<ImplicitCastExpr>("unsignedCastExpression");
+
+    // We can ignore the ASCII value range also for unsigned char.
+    Expr::EvalResult EVResult;
+    if (!UnSignedCastExpression->isValueDependent() &&
+        UnSignedCastExpression->getSubExpr()->EvaluateAsInt(EVResult,
+                                                            *Result.Context)) {
+      llvm::APSInt Value = EVResult.Val.getInt();
+      if (Value <= UnsignedASCIIUpperBound)
+        return;
+    }
+
+    diag(Comparison->getBeginLoc(),
+         "comparison between 'signed char' and 'unsigned char'");
+  } else if (const auto *IntegerType =
+                 Result.Nodes.getNodeAs<QualType>("integerType")) {
+    diag(SignedCastExpression->getBeginLoc(),
+         "'signed char' to %0 conversion; "
+         "consider casting to 'unsigned char' first.")
+        << *IntegerType;
+  } else
+    llvm_unreachable("Unexpected match");
 }
 
 } // namespace bugprone

diff  --git a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h
index 4ce9895a8322..3fa0c9c0a088 100644
--- a/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h
@@ -15,13 +15,11 @@ namespace clang {
 namespace tidy {
 namespace bugprone {
 
-/// Finds ``signed char`` -> integer conversions which might indicate a programming
-/// error. The basic problem with the ``signed char``, that it might store the
-/// non-ASCII characters as negative values. The human programmer probably
-/// expects that after an integer conversion the converted value matches with the
-/// character code (a value from [0..255]), however, the actual value is in
-/// [-128..127] interval. This also applies to the plain ``char`` type on
-/// those implementations which represent ``char`` similar to ``signed char``.
+/// Finds those ``signed char`` -> integer conversions which might indicate a
+/// programming error. The basic problem with the ``signed char``, that it might
+/// store the non-ASCII characters as negative values. This behavior can cause a
+/// misunderstanding of the written code both when an explicit and when an
+/// implicit conversion happens.
 ///
 /// For the user-facing documentation see:
 /// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-signed-char-misuse.html
@@ -34,6 +32,11 @@ class SignedCharMisuseCheck : public ClangTidyCheck {
   void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
 
 private:
+  ast_matchers::internal::BindableMatcher<clang::Stmt> charCastExpression(
+      bool IsSigned,
+      const ast_matchers::internal::Matcher<clang::QualType> &IntegerType,
+      const std::string &CastBindName) const;
+
   const std::string CharTypdefsToIgnoreList;
 };
 

diff  --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst
index 20187b160e81..e3ecf75a3a52 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst
@@ -3,27 +3,39 @@
 bugprone-signed-char-misuse
 ===========================
 
-Finds ``signed char`` -> integer conversions which might indicate a programming
-error. The basic problem with the ``signed char``, that it might store the
-non-ASCII characters as negative values. The human programmer probably
-expects that after an integer conversion the converted value matches with the
+Finds those ``signed char`` -> integer conversions which might indicate a
+programming error. The basic problem with the ``signed char``, that it might
+store the non-ASCII characters as negative values. This behavior can cause a
+misunderstanding of the written code both when an explicit and when an
+implicit conversion happens.
+
+When the code contains an explicit ``signed char`` -> integer conversion, the
+human programmer probably expects that the converted value matches with the
 character code (a value from [0..255]), however, the actual value is in
-[-128..127] interval. This also applies to the plain ``char`` type on
-those implementations which represent ``char`` similar to ``signed char``.
-
-To avoid this kind of misinterpretation, the desired way of converting from a
-``signed char`` to an integer value is converting to ``unsigned char`` first,
-which stores all the characters in the positive [0..255] interval which matches
-with the known character codes.
-
-It depends on the actual platform whether ``char`` is handled as ``signed char``
+[-128..127] interval. To avoid this kind of misinterpretation, the desired way
+of converting from a ``signed char`` to an integer value is converting to
+``unsigned char`` first, which stores all the characters in the positive [0..255]
+interval which matches the known character codes.
+
+In case of implicit conversion, the programmer might not actually be aware
+that a conversion happened and char value is used as an integer. There are
+some use cases when this unawareness might lead to a functionally imperfect code.
+For example, checking the equality of a ``signed char`` and an ``unsigned char``
+variable is something we should avoid in C++ code. During this comparison,
+the two variables are converted to integers which have 
diff erent value ranges.
+For ``signed char``, the non-ASCII characters are stored as a value in [-128..-1]
+interval, while the same characters are stored in the [128..255] interval for
+an ``unsigned char``.
+
+It depends on the actual platform whether plain ``char`` is handled as ``signed char``
 by default and so it is caught by this check or not. To change the default behavior
 you can use ``-funsigned-char`` and ``-fsigned-char`` compilation options.
 
 Currently, this check is limited to assignments and variable declarations,
-where a ``signed char`` is assigned to an integer variable. There are other
-use cases where the same misinterpretation might lead to similar bogus
-behavior.
+where a ``signed char`` is assigned to an integer variable and to
+equality/inequality comparisons between ``signed char`` and ``unsigned char``.
+There are other use cases where the unexpected value ranges might lead to
+similar bogus behavior.
 
 See also:
 `STR34-C. Cast characters to unsigned char before converting to larger integer sizes
@@ -67,6 +79,29 @@ an ``unsigned char`` value first.
     return IChar;
   }
 
+Another use case is checking the equality of two ``char`` variables with
+
diff erent signedness. Inside the non-ASCII value range this comparison between
+a ``signed char`` and an ``unsigned char`` always returns ``false``.
+
+.. code-block:: c++
+
+  bool compare(signed char SChar, unsigned char USChar) {
+    if (SChar == USChar)
+      return true;
+    return false;
+  }
+
+The easiest way to fix this kind of comparison is casting one of the arguments,
+so both arguments will have the same type.
+
+.. code-block:: c++
+
+  bool compare(signed char SChar, unsigned char USChar) {
+    if (static_cast<unsigned char>(SChar) == USChar)
+      return true;
+    return false;
+  }
+
 .. option:: CharTypdefsToIgnore
 
   A semicolon-separated list of typedef names. In this list, we can list

diff  --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp
index f3f1a1d8537e..ef42b0c85ae6 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse.cpp
@@ -62,6 +62,34 @@ int CharPointer(signed char *CCharacter) {
   return NCharacter;
 }
 
+int SignedUnsignedCharEquality(signed char SCharacter) {
+  unsigned char USCharacter = 'a';
+  if (SCharacter == USCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
+    return 1;
+  return 0;
+}
+
+int SignedUnsignedCharIneqiality(signed char SCharacter) {
+  unsigned char USCharacter = 'a';
+  if (SCharacter != USCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
+    return 1;
+  return 0;
+}
+
+int CompareWithNonAsciiConstant(unsigned char USCharacter) {
+  const signed char SCharacter = -5;
+  if (USCharacter == SCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
+    return 1;
+  return 0;
+}
+
+int CompareWithUnsignedNonAsciiConstant(signed char SCharacter) {
+  const unsigned char USCharacter = 128;
+  if (USCharacter == SCharacter) // CHECK-MESSAGES: [[@LINE]]:7: warning: comparison between 'signed char' and 'unsigned char' [bugprone-signed-char-misuse]
+    return 1;
+  return 0;
+}
+
 ///////////////////////////////////////////////////////////////////
 /// Test cases correctly ignored by the check.
 
@@ -121,3 +149,61 @@ unsigned char CharToCharCast() {
 
   return USCharacter;
 }
+
+int FixComparisonWithSignedCharCast(signed char SCharacter) {
+  unsigned char USCharacter = 'a';
+  if (SCharacter == static_cast<signed char>(USCharacter))
+    return 1;
+  return 0;
+}
+
+int FixComparisonWithUnSignedCharCast(signed char SCharacter) {
+  unsigned char USCharacter = 'a';
+  if (static_cast<unsigned char>(SCharacter) == USCharacter)
+    return 1;
+  return 0;
+}
+
+// Make sure we don't catch other type of char comparison.
+int SameCharTypeComparison(signed char SCharacter) {
+  signed char SCharacter2 = 'a';
+  if (SCharacter == SCharacter2)
+    return 1;
+  return 0;
+}
+
+// Make sure we don't catch other type of char comparison.
+int SameCharTypeComparison2(unsigned char USCharacter) {
+  unsigned char USCharacter2 = 'a';
+  if (USCharacter == USCharacter2)
+    return 1;
+  return 0;
+}
+
+// Make sure we don't catch integer - char comparison.
+int CharIntComparison(signed char SCharacter) {
+  int ICharacter = 10;
+  if (SCharacter == ICharacter)
+    return 1;
+  return 0;
+}
+
+int CompareWithAsciiLiteral(unsigned char USCharacter) {
+  if (USCharacter == 'x') // no warning
+    return 1;
+  return 0;
+}
+
+int CompareWithAsciiConstant(unsigned char USCharacter) {
+  const signed char SCharacter = 'a';
+  if (USCharacter == SCharacter)
+    return 1;
+  return 0;
+}
+
+int CompareWithUnsignedAsciiConstant(signed char SCharacter) {
+  const unsigned char USCharacter = 'a';
+  if (USCharacter == SCharacter)
+    return 1;
+  return 0;
+}


        


More information about the cfe-commits mailing list