r276782 - [analyzer] Add basic capabilities to detect source code clones.

Artem Dergachev via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 26 11:13:12 PDT 2016


Author: dergachev
Date: Tue Jul 26 13:13:12 2016
New Revision: 276782

URL: http://llvm.org/viewvc/llvm-project?rev=276782&view=rev
Log:
[analyzer] Add basic capabilities to detect source code clones.

This patch adds the CloneDetector class which allows searching source code
for clones.

For every statement or group of statements within a compound statement,
CloneDetector computes a hash value, and finds clones by detecting
identical hash values.

This initial patch only provides a simple hashing mechanism
that hashes the kind of each sub-statement.

This patch also adds CloneChecker - a simple static analyzer checker
that uses CloneDetector to report copy-pasted code.

Patch by Raphael Isemann!

Differential Revision: https://reviews.llvm.org/D20795

Added:
    cfe/trunk/include/clang/Analysis/CloneDetection.h
    cfe/trunk/lib/Analysis/CloneDetection.cpp
    cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
    cfe/trunk/test/Analysis/copypaste/
    cfe/trunk/test/Analysis/copypaste/blocks.cpp
    cfe/trunk/test/Analysis/copypaste/false-positives.cpp
    cfe/trunk/test/Analysis/copypaste/functions.cpp
    cfe/trunk/test/Analysis/copypaste/objc-methods.m
    cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp
Modified:
    cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td
    cfe/trunk/lib/Analysis/CMakeLists.txt
    cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt

Added: cfe/trunk/include/clang/Analysis/CloneDetection.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Analysis/CloneDetection.h?rev=276782&view=auto
==============================================================================
--- cfe/trunk/include/clang/Analysis/CloneDetection.h (added)
+++ cfe/trunk/include/clang/Analysis/CloneDetection.h Tue Jul 26 13:13:12 2016
@@ -0,0 +1,235 @@
+//===--- CloneDetection.h - Finds code clones in an AST ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// /file
+/// This file defines classes for searching and anlyzing source code clones.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_CLONEDETECTION_H
+#define LLVM_CLANG_AST_CLONEDETECTION_H
+
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/StringMap.h"
+
+#include <vector>
+
+namespace clang {
+
+class Stmt;
+class Decl;
+class ASTContext;
+class CompoundStmt;
+
+/// \brief Identifies a list of statements.
+///
+/// Can either identify a single arbitrary Stmt object, a continuous sequence of
+/// child statements inside a CompoundStmt or no statements at all.
+class StmtSequence {
+  /// If this object identifies a sequence of statements inside a CompoundStmt,
+  /// S points to this CompoundStmt. If this object only identifies a single
+  /// Stmt, then S is a pointer to this Stmt.
+  const Stmt *S;
+
+  /// The related ASTContext for S.
+  ASTContext *Context;
+
+  /// If EndIndex is non-zero, then S is a CompoundStmt and this StmtSequence
+  /// instance is representing the CompoundStmt children inside the array
+  /// [StartIndex, EndIndex).
+  unsigned StartIndex;
+  unsigned EndIndex;
+
+public:
+  /// \brief Constructs a StmtSequence holding multiple statements.
+  ///
+  /// The resulting StmtSequence identifies a continuous sequence of statements
+  /// in the body of the given CompoundStmt. Which statements of the body should
+  /// be identified needs to be specified by providing a start and end index
+  /// that describe a non-empty sub-array in the body of the given CompoundStmt.
+  ///
+  /// \param Stmt A CompoundStmt that contains all statements in its body.
+  /// \param Context The ASTContext for the given CompoundStmt.
+  /// \param StartIndex The inclusive start index in the children array of
+  ///                   \p Stmt
+  /// \param EndIndex The exclusive end index in the children array of \p Stmt.
+  StmtSequence(const CompoundStmt *Stmt, ASTContext &Context,
+               unsigned StartIndex, unsigned EndIndex);
+
+  /// \brief Constructs a StmtSequence holding a single statement.
+  ///
+  /// \param Stmt An arbitrary Stmt.
+  /// \param Context The ASTContext for the given Stmt.
+  StmtSequence(const Stmt *Stmt, ASTContext &Context);
+
+  /// \brief Constructs an empty StmtSequence.
+  StmtSequence();
+
+  typedef const Stmt *const *iterator;
+
+  /// Returns an iterator pointing to the first statement in this sequence.
+  iterator begin() const;
+
+  /// Returns an iterator pointing behind the last statement in this sequence.
+  iterator end() const;
+
+  /// Returns the first statement in this sequence.
+  ///
+  /// This method should only be called on a non-empty StmtSequence object.
+  const Stmt *front() const {
+    assert(!empty());
+    return begin()[0];
+  }
+
+  /// Returns the last statement in this sequence.
+  ///
+  /// This method should only be called on a non-empty StmtSequence object.
+  const Stmt *back() const {
+    assert(!empty());
+    return begin()[size() - 1];
+  }
+
+  /// Returns the number of statements this object holds.
+  unsigned size() const {
+    if (holdsSequence())
+      return EndIndex - StartIndex;
+    if (S == nullptr)
+      return 0;
+    return 1;
+  }
+
+  /// Returns true if and only if this StmtSequence contains no statements.
+  bool empty() const { return size() == 0; }
+
+  /// Returns the related ASTContext for the stored Stmts.
+  ASTContext &getASTContext() const {
+    assert(Context);
+    return *Context;
+  }
+
+  /// Returns true if this objects holds a list of statements.
+  bool holdsSequence() const { return EndIndex != 0; }
+
+  /// Returns the start sourcelocation of the first statement in this sequence.
+  ///
+  /// This method should only be called on a non-empty StmtSequence object.
+  SourceLocation getStartLoc() const;
+
+  /// Returns the end sourcelocation of the last statement in this sequence.
+  ///
+  /// This method should only be called on a non-empty StmtSequence object.
+  SourceLocation getEndLoc() const;
+
+  bool operator==(const StmtSequence &Other) const {
+    return std::tie(S, StartIndex, EndIndex) ==
+           std::tie(Other.S, Other.StartIndex, Other.EndIndex);
+  }
+
+  bool operator!=(const StmtSequence &Other) const {
+    return std::tie(S, StartIndex, EndIndex) !=
+           std::tie(Other.S, Other.StartIndex, Other.EndIndex);
+  }
+
+  /// Returns true if and only if this sequence covers a source range that
+  /// contains the source range of the given sequence \p Other.
+  ///
+  /// This method should only be called on a non-empty StmtSequence object
+  /// and passed a non-empty StmtSequence object.
+  bool contains(const StmtSequence &Other) const;
+};
+
+/// \brief Searches for clones in source code.
+///
+/// First, this class needs a translation unit which is passed via
+/// \p analyzeTranslationUnit . It will then generate and store search data
+/// for all statements inside the given translation unit.
+/// Afterwards the generated data can be used to find code clones by calling
+/// \p findClones .
+///
+/// This class only searches for clones in exectuable source code
+/// (e.g. function bodies). Other clones (e.g. cloned comments or declarations)
+/// are not supported.
+class CloneDetector {
+public:
+  /// Holds the data about a StmtSequence that is needed during the search for
+  /// code clones.
+  struct CloneSignature {
+    /// \brief Holds all relevant data of a StmtSequence.
+    ///
+    /// If this variable is equal for two different StmtSequences, then they can
+    /// be considered clones of each other.
+    std::vector<unsigned> Data;
+
+    /// \brief The complexity of the StmtSequence.
+    ///
+    /// This scalar value serves as a simple way of filtering clones that are
+    /// too small to be reported. A greater value indicates that the related
+    /// StmtSequence is probably more interesting to the user.
+    unsigned Complexity;
+
+    /// \brief Creates an empty CloneSignature without any data.
+    CloneSignature() : Complexity(1) {}
+
+    CloneSignature(const std::vector<unsigned> &Data, unsigned Complexity)
+        : Data(Data), Complexity(Complexity) {}
+
+    /// \brief Adds the data from the given CloneSignature to this one.
+    void add(const CloneSignature &Other) {
+      Data.insert(Data.end(), Other.Data.begin(), Other.Data.end());
+      Complexity += Other.Complexity;
+    }
+  };
+
+  /// Holds group of StmtSequences that are clones of each other and the
+  /// complexity value (see CloneSignature::Complexity) that all stored
+  /// StmtSequences have in common.
+  struct CloneGroup {
+    std::vector<StmtSequence> Sequences;
+    unsigned Complexity;
+
+    CloneGroup(const StmtSequence &Seq, unsigned Complexity)
+        : Complexity(Complexity) {
+      Sequences.push_back(Seq);
+    }
+
+    /// \brief Returns false if and only if this group should be skipped when
+    ///        searching for clones.
+    bool isValid() const {
+      // A clone group with only one member makes no sense, so we skip them.
+      return Sequences.size() > 1;
+    }
+  };
+
+  /// \brief Generates and stores search data for all statements in the body of
+  ///        the given Decl.
+  void analyzeCodeBody(const Decl *D);
+
+  /// \brief Stores the CloneSignature to allow future querying.
+  void add(const StmtSequence &S, const CloneSignature &Signature);
+
+  /// \brief Searches the provided statements for clones.
+  ///
+  /// \param Result Output parameter that is filled with a list of found
+  ///               clone groups. Each group contains multiple StmtSequences
+  ///               that were identified to be clones of each other.
+  /// \param MinGroupComplexity Only return clones which have at least this
+  ///                           complexity value.
+  void findClones(std::vector<CloneGroup> &Result, unsigned MinGroupComplexity);
+
+private:
+  /// Stores all found clone groups including invalid groups with only a single
+  /// statement.
+  std::vector<CloneGroup> CloneGroups;
+  /// Maps search data to its related index in the \p CloneGroups vector.
+  llvm::StringMap<std::size_t> CloneGroupIndexes;
+};
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_AST_CLONEDETECTION_H

Modified: cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td?rev=276782&r1=276781&r2=276782&view=diff
==============================================================================
--- cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td (original)
+++ cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td Tue Jul 26 13:13:12 2016
@@ -77,6 +77,8 @@ def MPI : Package<"mpi">, InPackage<OptI
 def LLVM : Package<"llvm">;
 def Debug : Package<"debug">;
 
+def CloneDetectionAlpha : Package<"clone">, InPackage<Alpha>, Hidden;
+
 //===----------------------------------------------------------------------===//
 // Core Checkers.
 //===----------------------------------------------------------------------===//
@@ -661,3 +663,17 @@ def BugHashDumper : Checker<"DumpBugHash
   DescFile<"DebugCheckers.cpp">;
 
 } // end "debug"
+
+
+//===----------------------------------------------------------------------===//
+// Clone Detection
+//===----------------------------------------------------------------------===//
+
+let ParentPackage = CloneDetectionAlpha in {
+
+def CloneChecker : Checker<"CloneChecker">,
+  HelpText<"Reports similar pieces of code.">,
+  DescFile<"CloneChecker.cpp">;
+
+} // end "clone"
+

Modified: cfe/trunk/lib/Analysis/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CMakeLists.txt?rev=276782&r1=276781&r2=276782&view=diff
==============================================================================
--- cfe/trunk/lib/Analysis/CMakeLists.txt (original)
+++ cfe/trunk/lib/Analysis/CMakeLists.txt Tue Jul 26 13:13:12 2016
@@ -9,6 +9,7 @@ add_clang_library(clangAnalysis
   CFGReachabilityAnalysis.cpp
   CFGStmtMap.cpp
   CallGraph.cpp
+  CloneDetection.cpp
   CocoaConventions.cpp
   Consumed.cpp
   CodeInjector.cpp

Added: cfe/trunk/lib/Analysis/CloneDetection.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CloneDetection.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/lib/Analysis/CloneDetection.cpp (added)
+++ cfe/trunk/lib/Analysis/CloneDetection.cpp Tue Jul 26 13:13:12 2016
@@ -0,0 +1,277 @@
+//===--- CloneDetection.cpp - Finds code clones in an AST -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+///  This file implements classes for searching and anlyzing source code clones.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/CloneDetection.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Stmt.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace clang;
+
+StmtSequence::StmtSequence(const CompoundStmt *Stmt, ASTContext &Context,
+                           unsigned StartIndex, unsigned EndIndex)
+    : S(Stmt), Context(&Context), StartIndex(StartIndex), EndIndex(EndIndex) {
+  assert(Stmt && "Stmt must not be a nullptr");
+  assert(StartIndex < EndIndex && "Given array should not be empty");
+  assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt");
+}
+
+StmtSequence::StmtSequence(const Stmt *Stmt, ASTContext &Context)
+    : S(Stmt), Context(&Context), StartIndex(0), EndIndex(0) {}
+
+StmtSequence::StmtSequence()
+    : S(nullptr), Context(nullptr), StartIndex(0), EndIndex(0) {}
+
+bool StmtSequence::contains(const StmtSequence &Other) const {
+  // If both sequences reside in different translation units, they can never
+  // contain each other.
+  if (Context != Other.Context)
+    return false;
+
+  const SourceManager &SM = Context->getSourceManager();
+
+  // Otherwise check if the start and end locations of the current sequence
+  // surround the other sequence.
+  bool StartIsInBounds =
+      SM.isBeforeInTranslationUnit(getStartLoc(), Other.getStartLoc()) ||
+      getStartLoc() == Other.getStartLoc();
+  if (!StartIsInBounds)
+    return false;
+
+  bool EndIsInBounds =
+      SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) ||
+      Other.getEndLoc() == getEndLoc();
+  return EndIsInBounds;
+}
+
+StmtSequence::iterator StmtSequence::begin() const {
+  if (!holdsSequence()) {
+    return &S;
+  }
+  auto CS = cast<CompoundStmt>(S);
+  return CS->body_begin() + StartIndex;
+}
+
+StmtSequence::iterator StmtSequence::end() const {
+  if (!holdsSequence()) {
+    return &S + 1;
+  }
+  auto CS = cast<CompoundStmt>(S);
+  return CS->body_begin() + EndIndex;
+}
+
+SourceLocation StmtSequence::getStartLoc() const {
+  return front()->getLocStart();
+}
+
+SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); }
+
+namespace {
+/// Generates CloneSignatures for a set of statements and stores the results in
+/// a CloneDetector object.
+class CloneSignatureGenerator {
+
+  CloneDetector &CD;
+  ASTContext &Context;
+
+  /// \brief Generates CloneSignatures for all statements in the given statement
+  /// tree and stores them in the CloneDetector.
+  ///
+  /// \param S The root of the given statement tree.
+  /// \return The CloneSignature of the root statement.
+  CloneDetector::CloneSignature generateSignatures(const Stmt *S) {
+    // Create an empty signature that will be filled in this method.
+    CloneDetector::CloneSignature Signature;
+
+    // The only relevant data for now is the class of the statement.
+    // TODO: Collect statement class specific data.
+    Signature.Data.push_back(S->getStmtClass());
+
+    // Storage for the signatures of the direct child statements. This is only
+    // needed if the current statement is a CompoundStmt.
+    std::vector<CloneDetector::CloneSignature> ChildSignatures;
+    const CompoundStmt *CS = dyn_cast<const CompoundStmt>(S);
+
+    // The signature of a statement includes the signatures of its children.
+    // Therefore we create the signatures for every child and add them to the
+    // current signature.
+    for (const Stmt *Child : S->children()) {
+      // Some statements like 'if' can have nullptr children that we will skip.
+      if (!Child)
+        continue;
+
+      // Recursive call to create the signature of the child statement. This
+      // will also create and store all clone groups in this child statement.
+      auto ChildSignature = generateSignatures(Child);
+
+      // Add the collected data to the signature of the current statement.
+      Signature.add(ChildSignature);
+
+      // If the current statement is a CompoundStatement, we need to store the
+      // signature for the generation of the sub-sequences.
+      if (CS)
+        ChildSignatures.push_back(ChildSignature);
+    }
+
+    // If the current statement is a CompoundStmt, we also need to create the
+    // clone groups from the sub-sequences inside the children.
+    if (CS)
+      handleSubSequences(CS, ChildSignatures);
+
+    // Save the signature for the current statement in the CloneDetector object.
+    CD.add(StmtSequence(S, Context), Signature);
+
+    return Signature;
+  }
+
+  /// \brief Adds all possible sub-sequences in the child array of the given
+  ///        CompoundStmt to the CloneDetector.
+  /// \param CS The given CompoundStmt.
+  /// \param ChildSignatures A list of calculated signatures for each child in
+  ///                        the given CompoundStmt.
+  void handleSubSequences(
+      const CompoundStmt *CS,
+      const std::vector<CloneDetector::CloneSignature> &ChildSignatures) {
+
+    // FIXME: This function has quadratic runtime right now. Check if skipping
+    // this function for too long CompoundStmts is an option.
+
+    // The length of the sub-sequence. We don't need to handle sequences with
+    // the length 1 as they are already handled in CollectData().
+    for (unsigned Length = 2; Length <= CS->size(); ++Length) {
+      // The start index in the body of the CompoundStmt. We increase the
+      // position until the end of the sub-sequence reaches the end of the
+      // CompoundStmt body.
+      for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) {
+        // Create an empty signature and add the signatures of all selected
+        // child statements to it.
+        CloneDetector::CloneSignature SubSignature;
+
+        for (unsigned i = Pos; i < Pos + Length; ++i) {
+          SubSignature.add(ChildSignatures[i]);
+        }
+
+        // Save the signature together with the information about what children
+        // sequence we selected.
+        CD.add(StmtSequence(CS, Context, Pos, Pos + Length), SubSignature);
+      }
+    }
+  }
+
+public:
+  explicit CloneSignatureGenerator(CloneDetector &CD, ASTContext &Context)
+      : CD(CD), Context(Context) {}
+
+  /// \brief Generates signatures for all statements in the given function body.
+  void consumeCodeBody(const Stmt *S) { generateSignatures(S); }
+};
+} // end anonymous namespace
+
+void CloneDetector::analyzeCodeBody(const Decl *D) {
+  assert(D);
+  assert(D->hasBody());
+  CloneSignatureGenerator Generator(*this, D->getASTContext());
+  Generator.consumeCodeBody(D->getBody());
+}
+
+void CloneDetector::add(const StmtSequence &S,
+                        const CloneSignature &Signature) {
+  // StringMap only works with StringRefs, so we create one for our data vector.
+  auto &Data = Signature.Data;
+  StringRef DataRef = StringRef(reinterpret_cast<const char *>(Data.data()),
+                                Data.size() * sizeof(unsigned));
+
+  // Search with the help of the signature if we already have encountered a
+  // clone of the given StmtSequence.
+  auto I = CloneGroupIndexes.find(DataRef);
+  if (I == CloneGroupIndexes.end()) {
+    // We haven't found an existing clone group, so we create a new clone group
+    // for this StmtSequence and store the index of it in our search map.
+    CloneGroupIndexes[DataRef] = CloneGroups.size();
+    CloneGroups.emplace_back(S, Signature.Complexity);
+    return;
+  }
+
+  // We have found an existing clone group and can expand it with the given
+  // StmtSequence.
+  CloneGroups[I->getValue()].Sequences.push_back(S);
+}
+
+namespace {
+/// \brief Returns true if and only if \p Stmt contains at least one other
+/// sequence in the \p Group.
+bool containsAnyInGroup(StmtSequence &Stmt,
+                            CloneDetector::CloneGroup &Group) {
+  for (StmtSequence &GroupStmt : Group.Sequences) {
+    if (Stmt.contains(GroupStmt))
+      return true;
+  }
+  return false;
+}
+
+/// \brief Returns true if and only if all sequences in \p OtherGroup are
+/// contained by a sequence in \p Group.
+bool containsGroup(CloneDetector::CloneGroup &Group,
+                   CloneDetector::CloneGroup &OtherGroup) {
+  // We have less sequences in the current group than we have in the other,
+  // so we will never fulfill the requirement for returning true. This is only
+  // possible because we know that a sequence in Group can contain at most
+  // one sequence in OtherGroup.
+  if (Group.Sequences.size() < OtherGroup.Sequences.size())
+    return false;
+
+  for (StmtSequence &Stmt : Group.Sequences) {
+    if (!containsAnyInGroup(Stmt, OtherGroup))
+      return false;
+  }
+  return true;
+}
+} // end anonymous namespace
+
+void CloneDetector::findClones(std::vector<CloneGroup> &Result,
+                               unsigned MinGroupComplexity) {
+  // Add every valid clone group that fulfills the complexity requirement.
+  for (const CloneGroup &Group : CloneGroups) {
+    if (Group.isValid() && Group.Complexity >= MinGroupComplexity) {
+      Result.push_back(Group);
+    }
+  }
+
+  std::vector<unsigned> IndexesToRemove;
+
+  // Compare every group in the result with the rest. If one groups contains
+  // another group, we only need to return the bigger group.
+  // Note: This doesn't scale well, so if possible avoid calling any heavy
+  // function from this loop to minimize the performance impact.
+  for (unsigned i = 0; i < Result.size(); ++i) {
+    for (unsigned j = 0; j < Result.size(); ++j) {
+      // Don't compare a group with itself.
+      if (i == j)
+        continue;
+
+      if (containsGroup(Result[j], Result[i])) {
+        IndexesToRemove.push_back(i);
+        break;
+      }
+    }
+  }
+
+  // Erasing a list of indexes from the vector should be done with decreasing
+  // indexes. As IndexesToRemove is constructed with increasing values, we just
+  // reverse iterate over it to get the desired order.
+  for (auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) {
+    Result.erase(Result.begin() + *I);
+  }
+}

Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt?rev=276782&r1=276781&r2=276782&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt (original)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt Tue Jul 26 13:13:12 2016
@@ -22,6 +22,7 @@ add_clang_library(clangStaticAnalyzerChe
   CheckerDocumentation.cpp
   ChrootChecker.cpp
   ClangCheckers.cpp
+  CloneChecker.cpp
   CXXSelfAssignmentChecker.cpp
   DeadStoresChecker.cpp
   DebugCheckers.cpp

Added: cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp (added)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp Tue Jul 26 13:13:12 2016
@@ -0,0 +1,96 @@
+//===--- CloneChecker.cpp - Clone detection checker -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current translation
+/// unit.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "clang/Analysis/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker
+    : public Checker<check::ASTCodeBody, check::EndOfTranslationUnit> {
+  mutable CloneDetector CloneDetector;
+
+public:
+  void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
+                        BugReporter &BR) const;
+
+  void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+                                 AnalysisManager &Mgr, BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
+                                    BugReporter &BR) const {
+  // Every statement that should be included in the search for clones needs to
+  // be passed to the CloneDetector.
+  CloneDetector.analyzeCodeBody(D);
+}
+
+void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+                                             AnalysisManager &Mgr,
+                                             BugReporter &BR) const {
+  // At this point, every statement in the translation unit has been analyzed by
+  // the CloneDetector. The only thing left to do is to report the found clones.
+
+  int MinComplexity = Mgr.getAnalyzerOptions().getOptionAsInteger(
+      "MinimumCloneComplexity", 10, this);
+
+  assert(MinComplexity >= 0);
+
+  SourceManager &SM = BR.getSourceManager();
+
+  std::vector<CloneDetector::CloneGroup> CloneGroups;
+  CloneDetector.findClones(CloneGroups, MinComplexity);
+
+  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+  unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+                                               "Detected code clone.");
+
+  unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
+                                               "Related code clone is here.");
+
+  for (CloneDetector::CloneGroup &Group : CloneGroups) {
+    // For readability reasons we sort the clones by line numbers.
+    std::sort(Group.Sequences.begin(), Group.Sequences.end(),
+              [&SM](const StmtSequence &LHS, const StmtSequence &RHS) {
+                return SM.isBeforeInTranslationUnit(LHS.getStartLoc(),
+                                                    RHS.getStartLoc()) &&
+                       SM.isBeforeInTranslationUnit(LHS.getEndLoc(),
+                                                    RHS.getEndLoc());
+              });
+
+    // We group the clones by printing the first as a warning and all others
+    // as a note.
+    DiagEngine.Report(Group.Sequences.front().getStartLoc(), WarnID);
+    for (unsigned i = 1; i < Group.Sequences.size(); ++i) {
+      DiagEngine.Report(Group.Sequences[i].getStartLoc(), NoteID);
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Register CloneChecker
+//===----------------------------------------------------------------------===//
+
+void ento::registerCloneChecker(CheckerManager &Mgr) {
+  Mgr.registerChecker<CloneChecker>();
+}

Added: cfe/trunk/test/Analysis/copypaste/blocks.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/blocks.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/blocks.cpp (added)
+++ cfe/trunk/test/Analysis/copypaste/blocks.cpp Tue Jul 26 13:13:12 2016
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -analyze -fblocks -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in blocks.
+
+void log();
+
+auto BlockA = ^(int a, int b){ // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+    return a;
+  return b;
+};
+
+auto BlockB = ^(int a, int b){ // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+    return a;
+  return b;
+};

Added: cfe/trunk/test/Analysis/copypaste/false-positives.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/false-positives.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/false-positives.cpp (added)
+++ cfe/trunk/test/Analysis/copypaste/false-positives.cpp Tue Jul 26 13:13:12 2016
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This test contains false-positive reports from the CloneChecker that need to
+// be fixed.
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+    return a;
+  return b;
+}
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+    return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+    return a;
+  return b;
+}

Added: cfe/trunk/test/Analysis/copypaste/functions.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/functions.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/functions.cpp (added)
+++ cfe/trunk/test/Analysis/copypaste/functions.cpp Tue Jul 26 13:13:12 2016
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in functions.
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+    return a;
+  return b;
+}
+
+int maxClone(int x, int y) { // expected-note{{Related code clone is here.}}
+  log();
+  if (x > y)
+    return x;
+  return y;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}

Added: cfe/trunk/test/Analysis/copypaste/objc-methods.m
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/objc-methods.m?rev=276782&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/objc-methods.m (added)
+++ cfe/trunk/test/Analysis/copypaste/objc-methods.m Tue Jul 26 13:13:12 2016
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -analyze -Wno-objc-root-class -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in Objective-C methods.
+
+ at interface A
+- (int) setOk : (int) a : (int) b;
+ at end
+
+ at implementation A
+- (int) setOk : (int) a : (int) b {  // expected-warning{{Detected code clone.}}
+  if (a > b)
+    return a;
+  return b;
+}
+ at end
+
+ at interface B
+- (int) setOk : (int) a : (int) b;
+ at end
+
+ at implementation B
+- (int) setOk : (int) a : (int) b { // expected-note{{Related code clone is here.}}
+  if (a > b)
+    return a;
+  return b;
+}
+ at end

Added: cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp?rev=276782&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp (added)
+++ cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp Tue Jul 26 13:13:12 2016
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if sub-sequences can match with normal sequences.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+    return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+    return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}




More information about the cfe-commits mailing list