r279367 - [analyzer] Make CloneDetector consider macro expansions.

Artem Dergachev via cfe-commits cfe-commits at lists.llvm.org
Sat Aug 20 03:06:59 PDT 2016


Author: dergachev
Date: Sat Aug 20 05:06:59 2016
New Revision: 279367

URL: http://llvm.org/viewvc/llvm-project?rev=279367&view=rev
Log:
[analyzer] Make CloneDetector consider macro expansions.

So far macro-generated code was treated by the CloneDetector as normal code.
This caused that some macros where reported as false-positive clones because
large chunks of code coming from otherwise concise macro expansions were treated
as copy-pasted code.

This patch ensures that macros are treated in the same way as literals/function
calls. This prevents macros that expand into multiple statements
from being reported as clones.

Patch by Raphael Isemann!

Differential Revision: https://reviews.llvm.org/D23316

Added:
    cfe/trunk/test/Analysis/copypaste/macro-complexity.cpp
    cfe/trunk/test/Analysis/copypaste/macros.cpp
Modified:
    cfe/trunk/include/clang/Analysis/CloneDetection.h
    cfe/trunk/lib/Analysis/CloneDetection.cpp

Modified: cfe/trunk/include/clang/Analysis/CloneDetection.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Analysis/CloneDetection.h?rev=279367&r1=279366&r2=279367&view=diff
==============================================================================
--- cfe/trunk/include/clang/Analysis/CloneDetection.h (original)
+++ cfe/trunk/include/clang/Analysis/CloneDetection.h Sat Aug 20 05:06:59 2016
@@ -171,9 +171,16 @@ public:
 
     /// \brief The complexity of the StmtSequence.
     ///
-    /// This scalar value serves as a simple way of filtering clones that are
-    /// too small to be reported. A greater value indicates that the related
-    /// StmtSequence is probably more interesting to the user.
+    /// This value gives an approximation on how many direct or indirect child
+    /// statements are contained in the related StmtSequence. In general, the
+    /// greater this value, the greater the amount of statements. However, this
+    /// is only an approximation and the actual amount of statements can be
+    /// higher or lower than this value. Statements that are generated by the
+    /// compiler (e.g. macro expansions) for example barely influence the
+    /// complexity value.
+    ///
+    /// The main purpose of this value is to filter clones that are too small
+    /// and therefore probably not interesting enough for the user.
     unsigned Complexity;
 
     /// \brief Creates an empty CloneSignature without any data.

Modified: cfe/trunk/lib/Analysis/CloneDetection.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CloneDetection.cpp?rev=279367&r1=279366&r2=279367&view=diff
==============================================================================
--- cfe/trunk/lib/Analysis/CloneDetection.cpp (original)
+++ cfe/trunk/lib/Analysis/CloneDetection.cpp Sat Aug 20 05:06:59 2016
@@ -17,7 +17,9 @@
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/Lex/Lexer.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace clang;
 
@@ -239,6 +241,38 @@ public:
 };
 }
 
+/// \brief Prints the macro name that contains the given SourceLocation into
+///        the given raw_string_ostream.
+static void printMacroName(llvm::raw_string_ostream &MacroStack,
+                           ASTContext &Context, SourceLocation Loc) {
+  MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(),
+                                             Context.getLangOpts());
+
+  // Add an empty space at the end as a padding to prevent
+  // that macro names concatenate to the names of other macros.
+  MacroStack << " ";
+}
+
+/// \brief Returns a string that represents all macro expansions that
+///        expanded into the given SourceLocation.
+///
+/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations
+/// A and B are expanded from the same macros in the same order.
+static std::string getMacroStack(SourceLocation Loc, ASTContext &Context) {
+  std::string MacroStack;
+  llvm::raw_string_ostream MacroStackStream(MacroStack);
+  SourceManager &SM = Context.getSourceManager();
+
+  // Iterate over all macros that expanded into the given SourceLocation.
+  while (Loc.isMacroID()) {
+    // Add the macro name to the stream.
+    printMacroName(MacroStackStream, Context, Loc);
+    Loc = SM.getImmediateMacroCallerLoc(Loc);
+  }
+  MacroStackStream.flush();
+  return MacroStack;
+}
+
 namespace {
 /// \brief Collects the data of a single Stmt.
 ///
@@ -299,7 +333,13 @@ public:
     ConstStmtVisitor<StmtDataCollector>::Visit##CLASS(S);                      \
   }
 
-  DEF_ADD_DATA(Stmt, { addData(S->getStmtClass()); })
+  DEF_ADD_DATA(Stmt, {
+    addData(S->getStmtClass());
+    // This ensures that macro generated code isn't identical to macro-generated
+    // code.
+    addData(getMacroStack(S->getLocStart(), Context));
+    addData(getMacroStack(S->getLocEnd(), Context));
+  })
   DEF_ADD_DATA(Expr, { addData(S->getType()); })
 
   //--- Builtin functionality ----------------------------------------------//
@@ -434,14 +474,36 @@ class CloneSignatureGenerator {
   /// tree and stores them in the CloneDetector.
   ///
   /// \param S The root of the given statement tree.
+  /// \param ParentMacroStack A string representing the macros that generated
+  ///                         the parent statement or an empty string if no
+  ///                         macros generated the parent statement.
+  ///                         See getMacroStack() for generating such a string.
   /// \return The CloneSignature of the root statement.
-  CloneDetector::CloneSignature generateSignatures(const Stmt *S) {
+  CloneDetector::CloneSignature
+  generateSignatures(const Stmt *S, const std::string &ParentMacroStack) {
     // Create an empty signature that will be filled in this method.
     CloneDetector::CloneSignature Signature;
 
     // Collect all relevant data from S and put it into the empty signature.
     StmtDataCollector(S, Context, Signature.Data);
 
+    // Look up what macros expanded into the current statement.
+    std::string StartMacroStack = getMacroStack(S->getLocStart(), Context);
+    std::string EndMacroStack = getMacroStack(S->getLocEnd(), Context);
+
+    // First, check if ParentMacroStack is not empty which means we are currently
+    // dealing with a parent statement which was expanded from a macro.
+    // If this parent statement was expanded from the same macros as this
+    // statement, we reduce the initial complexity of this statement to zero.
+    // This causes that a group of statements that were generated by a single
+    // macro expansion will only increase the total complexity by one.
+    // Note: This is not the final complexity of this statement as we still
+    // add the complexity of the child statements to the complexity value.
+    if (!ParentMacroStack.empty() && (StartMacroStack == ParentMacroStack &&
+                                      EndMacroStack == ParentMacroStack)) {
+      Signature.Complexity = 0;
+    }
+
     // Storage for the signatures of the direct child statements. This is only
     // needed if the current statement is a CompoundStmt.
     std::vector<CloneDetector::CloneSignature> ChildSignatures;
@@ -457,7 +519,8 @@ class CloneSignatureGenerator {
 
       // Recursive call to create the signature of the child statement. This
       // will also create and store all clone groups in this child statement.
-      auto ChildSignature = generateSignatures(Child);
+      // We pass only the StartMacroStack along to keep things simple.
+      auto ChildSignature = generateSignatures(Child, StartMacroStack);
 
       // Add the collected data to the signature of the current statement.
       Signature.add(ChildSignature);
@@ -518,7 +581,7 @@ public:
       : CD(CD), Context(Context) {}
 
   /// \brief Generates signatures for all statements in the given function body.
-  void consumeCodeBody(const Stmt *S) { generateSignatures(S); }
+  void consumeCodeBody(const Stmt *S) { generateSignatures(S, ""); }
 };
 } // end anonymous namespace
 

Added: cfe/trunk/test/Analysis/copypaste/macro-complexity.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/macro-complexity.cpp?rev=279367&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/macro-complexity.cpp (added)
+++ cfe/trunk/test/Analysis/copypaste/macro-complexity.cpp Sat Aug 20 05:06:59 2016
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:MinimumCloneComplexity=10 -verify %s
+
+// Tests that the complexity value of a macro expansion is about the same as
+// the complexity value of a normal function call and the the macro body doesn't
+// influence the complexity. See the CloneSignature class in CloneDetection.h
+// for more information about complexity values of clones.
+
+#define MACRO_FOO(a, b) a > b ? -a * a : -b * b;
+
+// First, manually apply MACRO_FOO and see if the code gets detected as a clone.
+// This confirms that with the current configuration the macro body would be
+// considered large enough to pass the MinimumCloneComplexity constraint.
+
+int manualMacro(int a, int b) { // expected-warning{{Detected code clone.}}
+  return a > b ? -a * a : -b * b;
+}
+
+int manualMacroClone(int a, int b) { // expected-note{{Related code clone is here.}}
+  return a > b ? -a * a : -b * b;
+}
+
+// Now we actually use the macro to generate the same AST as above. They
+// shouldn't be reported because the macros only slighly increase the complexity
+// value and the resulting code will never pass the MinimumCloneComplexity
+// constraint.
+
+int macro(int a, int b) {
+  return MACRO_FOO(a, b);
+}
+
+int macroClone(int a, int b) {
+  return MACRO_FOO(a, b);
+}
+
+// So far we only tested that macros increase the complexity by a lesser amount
+// than normal code. We also need to be sure this amount is not zero because
+// we otherwise macro code would be 'invisible' for the CloneDetector.
+// This tests that it is possible to increase the reach the minimum complexity
+// by only using macros. This is only possible if the complexity value is bigger
+// than zero.
+
+#define NEG(A) -(A)
+
+int nestedMacros() { // expected-warning{{Detected code clone.}}
+  return NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(1))))))))));
+}
+
+int nestedMacrosClone() { // expected-note{{Related code clone is here.}}
+  return NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(NEG(1))))))))));
+}

Added: cfe/trunk/test/Analysis/copypaste/macros.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/macros.cpp?rev=279367&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/macros.cpp (added)
+++ cfe/trunk/test/Analysis/copypaste/macros.cpp Sat Aug 20 05:06:59 2016
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// Tests that macros and non-macro clones aren't mixed into the same hash
+// group. This is currently necessary as all clones in a hash group need
+// to have the same complexity value. Macros have smaller complexity values
+// and need to be in their own hash group.
+
+int foo(int a) { // expected-warning{{Detected code clone.}}
+  a = a + 1;
+  a = a + 1 / 1;
+  a = a + 1 + 1 + 1;
+  a = a + 1 - 1 + 1 + 1;
+  a = a + 1 * 1 + 1 + 1 + 1;
+  a = a + 1 / 1 + 1 + 1 + 1;
+  return a;
+}
+
+int fooClone(int a) { // expected-note{{Related code clone is here.}}
+  a = a + 1;
+  a = a + 1 / 1;
+  a = a + 1 + 1 + 1;
+  a = a + 1 - 1 + 1 + 1;
+  a = a + 1 * 1 + 1 + 1 + 1;
+  a = a + 1 / 1 + 1 + 1 + 1;
+  return a;
+}
+
+// Below is the same AST as above but this time generated with macros. The
+// clones below should land in their own hash group for the reasons given above.
+
+#define ASSIGN(T, V) T = T + V
+
+int macro(int a) { // expected-warning{{Detected code clone.}}
+  ASSIGN(a, 1);
+  ASSIGN(a, 1 / 1);
+  ASSIGN(a, 1 + 1 + 1);
+  ASSIGN(a, 1 - 1 + 1 + 1);
+  ASSIGN(a, 1 * 1 + 1 + 1 + 1);
+  ASSIGN(a, 1 / 1 + 1 + 1 + 1);
+  return a;
+}
+
+int macroClone(int a) { // expected-note{{Related code clone is here.}}
+  ASSIGN(a, 1);
+  ASSIGN(a, 1 / 1);
+  ASSIGN(a, 1 + 1 + 1);
+  ASSIGN(a, 1 - 1 + 1 + 1);
+  ASSIGN(a, 1 * 1 + 1 + 1 + 1);
+  ASSIGN(a, 1 / 1 + 1 + 1 + 1);
+  return a;
+}
+
+// FIXME: Macros with empty definitions in the AST are currently ignored.
+
+#define EMPTY
+
+int fooFalsePositiveClone(int a) { // expected-note{{Related code clone is here.}}
+  a = EMPTY a + 1;
+  a = a + 1 / 1;
+  a = a + 1 + 1 + 1;
+  a = a + 1 - 1 + 1 + 1;
+  a = a + 1 * 1 + 1 + 1 + 1;
+  a = a + 1 / 1 + 1 + 1 + 1;
+  return a;
+}
+
+




More information about the cfe-commits mailing list