r311569 - [analyzer] Make StmtDataCollector customizable

Johannes Altmanninger via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 23 09:28:26 PDT 2017


Author: krobelus
Date: Wed Aug 23 09:28:26 2017
New Revision: 311569

URL: http://llvm.org/viewvc/llvm-project?rev=311569&view=rev
Log:
[analyzer] Make StmtDataCollector customizable

Summary:
This moves the data collection macro calls for Stmt nodes
to lib/AST/StmtDataCollectors.inc

Users can subclass ConstStmtVisitor and include StmtDataCollectors.inc
to define visitor methods for each Stmt subclass. This makes it also
possible to customize the visit methods as exemplified in
lib/Analysis/CloneDetection.cpp.

Move helper methods for data collection to a new module,
AST/DataCollection.

Add data collection for DeclRefExpr, MemberExpr and some literals.

Reviewers: arphaman, teemperor!

Subscribers: mgorny, xazax.hun, cfe-commits

Differential Revision: https://reviews.llvm.org/D36664

Added:
    cfe/trunk/include/clang/AST/DataCollection.h
    cfe/trunk/lib/AST/DataCollection.cpp
    cfe/trunk/lib/AST/StmtDataCollectors.inc
    cfe/trunk/unittests/AST/DataCollectionTest.cpp
Modified:
    cfe/trunk/include/clang/Analysis/CloneDetection.h
    cfe/trunk/lib/AST/CMakeLists.txt
    cfe/trunk/lib/Analysis/CloneDetection.cpp
    cfe/trunk/unittests/AST/CMakeLists.txt

Added: cfe/trunk/include/clang/AST/DataCollection.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/DataCollection.h?rev=311569&view=auto
==============================================================================
--- cfe/trunk/include/clang/AST/DataCollection.h (added)
+++ cfe/trunk/include/clang/AST/DataCollection.h Wed Aug 23 09:28:26 2017
@@ -0,0 +1,65 @@
+//===--- DatatCollection.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// \brief This file declares helper methods for collecting data from AST nodes.
+///
+/// To collect data from Stmt nodes, subclass ConstStmtVisitor and include
+/// StmtDataCollectors.inc after defining the macros that you need. This
+/// provides data collection implementations for most Stmt kinds. Note
+/// that that code requires some conditions to be met:
+///
+///   - There must be a method addData(const T &Data) that accepts strings,
+///     integral types as well as QualType. All data is forwarded using
+///     to this method.
+///   - The ASTContext of the Stmt must be accessible by the name Context.
+///
+/// It is also possible to override individual visit methods. Have a look at
+/// the DataCollector in lib/Analysis/CloneDetection.cpp for a usage example.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_DATACOLLECTION_H
+#define LLVM_CLANG_AST_DATACOLLECTION_H
+
+#include "clang/AST/ASTContext.h"
+
+namespace clang {
+namespace data_collection {
+
+/// Returns a string that represents all macro expansions that expanded into the
+/// given SourceLocation.
+///
+/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations
+/// A and B are expanded from the same macros in the same order.
+std::string getMacroStack(SourceLocation Loc, ASTContext &Context);
+
+/// Utility functions for implementing addData() for a consumer that has a
+/// method update(StringRef)
+template <class T>
+void addDataToConsumer(T &DataConsumer, llvm::StringRef Str) {
+  DataConsumer.update(Str);
+}
+
+template <class T> void addDataToConsumer(T &DataConsumer, const QualType &QT) {
+  addDataToConsumer(DataConsumer, QT.getAsString());
+}
+
+template <class T, class Type>
+typename std::enable_if<
+    std::is_integral<Type>::value || std::is_enum<Type>::value ||
+    std::is_convertible<Type, size_t>::value // for llvm::hash_code
+    >::type
+addDataToConsumer(T &DataConsumer, Type Data) {
+  DataConsumer.update(StringRef(reinterpret_cast<char *>(&Data), sizeof(Data)));
+}
+
+} // end namespace data_collection
+} // end namespace clang
+
+#endif // LLVM_CLANG_AST_DATACOLLECTION_H

Modified: cfe/trunk/include/clang/Analysis/CloneDetection.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Analysis/CloneDetection.h?rev=311569&r1=311568&r2=311569&view=diff
==============================================================================
--- cfe/trunk/include/clang/Analysis/CloneDetection.h (original)
+++ cfe/trunk/include/clang/Analysis/CloneDetection.h Wed Aug 23 09:28:26 2017
@@ -15,11 +15,7 @@
 #ifndef LLVM_CLANG_AST_CLONEDETECTION_H
 #define LLVM_CLANG_AST_CLONEDETECTION_H
 
-#include "clang/AST/DeclTemplate.h"
 #include "clang/AST/StmtVisitor.h"
-#include "clang/Basic/SourceLocation.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Regex.h"
 #include <vector>
 
@@ -31,192 +27,6 @@ class VarDecl;
 class ASTContext;
 class CompoundStmt;
 
-namespace clone_detection {
-
-/// Returns a string that represents all macro expansions that expanded into the
-/// given SourceLocation.
-///
-/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations
-/// A and B are expanded from the same macros in the same order.
-std::string getMacroStack(SourceLocation Loc, ASTContext &Context);
-
-/// Collects the data of a single Stmt.
-///
-/// This class defines what a code clone is: If it collects for two statements
-/// the same data, then those two statements are considered to be clones of each
-/// other.
-///
-/// All collected data is forwarded to the given data consumer of the type T.
-/// The data consumer class needs to provide a member method with the signature:
-///   update(StringRef Str)
-template <typename T>
-class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector<T>> {
-
-  ASTContext &Context;
-  /// The data sink to which all data is forwarded.
-  T &DataConsumer;
-
-public:
-  /// Collects data of the given Stmt.
-  /// \param S The given statement.
-  /// \param Context The ASTContext of S.
-  /// \param DataConsumer The data sink to which all data is forwarded.
-  StmtDataCollector(const Stmt *S, ASTContext &Context, T &DataConsumer)
-      : Context(Context), DataConsumer(DataConsumer) {
-    this->Visit(S);
-  }
-
-  typedef unsigned DataPiece;
-
-  // Below are utility methods for appending different data to the vector.
-
-  void addData(DataPiece Integer) {
-    DataConsumer.update(
-        StringRef(reinterpret_cast<char *>(&Integer), sizeof(Integer)));
-  }
-
-  void addData(llvm::StringRef Str) { DataConsumer.update(Str); }
-
-  void addData(const QualType &QT) { addData(QT.getAsString()); }
-
-// The functions below collect the class specific data of each Stmt subclass.
-
-// Utility macro for defining a visit method for a given class. This method
-// calls back to the ConstStmtVisitor to visit all parent classes.
-#define DEF_ADD_DATA(CLASS, CODE)                                              \
-  void Visit##CLASS(const CLASS *S) {                                          \
-    CODE;                                                                      \
-    ConstStmtVisitor<StmtDataCollector>::Visit##CLASS(S);                      \
-  }
-
-  DEF_ADD_DATA(Stmt, {
-    addData(S->getStmtClass());
-    // This ensures that macro generated code isn't identical to macro-generated
-    // code.
-    addData(getMacroStack(S->getLocStart(), Context));
-    addData(getMacroStack(S->getLocEnd(), Context));
-  })
-  DEF_ADD_DATA(Expr, { addData(S->getType()); })
-
-  //--- Builtin functionality ----------------------------------------------//
-  DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); })
-  DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); })
-  DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); })
-  DEF_ADD_DATA(TypeTraitExpr, {
-    addData(S->getTrait());
-    for (unsigned i = 0; i < S->getNumArgs(); ++i)
-      addData(S->getArg(i)->getType());
-  })
-
-  //--- Calls --------------------------------------------------------------//
-  DEF_ADD_DATA(CallExpr, {
-    // Function pointers don't have a callee and we just skip hashing it.
-    if (const FunctionDecl *D = S->getDirectCallee()) {
-      // If the function is a template specialization, we also need to handle
-      // the template arguments as they are not included in the qualified name.
-      if (auto Args = D->getTemplateSpecializationArgs()) {
-        std::string ArgString;
-
-        // Print all template arguments into ArgString
-        llvm::raw_string_ostream OS(ArgString);
-        for (unsigned i = 0; i < Args->size(); ++i) {
-          Args->get(i).print(Context.getLangOpts(), OS);
-          // Add a padding character so that 'foo<X, XX>()' != 'foo<XX, X>()'.
-          OS << '\n';
-        }
-        OS.flush();
-
-        addData(ArgString);
-      }
-      addData(D->getQualifiedNameAsString());
-    }
-  })
-
-  //--- Exceptions ---------------------------------------------------------//
-  DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); })
-
-  //--- C++ OOP Stmts ------------------------------------------------------//
-  DEF_ADD_DATA(CXXDeleteExpr, {
-    addData(S->isArrayFormAsWritten());
-    addData(S->isGlobalDelete());
-  })
-
-  //--- Casts --------------------------------------------------------------//
-  DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); })
-
-  //--- Miscellaneous Exprs ------------------------------------------------//
-  DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); })
-  DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); })
-
-  //--- Control flow -------------------------------------------------------//
-  DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); })
-  DEF_ADD_DATA(IndirectGotoStmt, {
-    if (S->getConstantTarget())
-      addData(S->getConstantTarget()->getName());
-  })
-  DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); })
-  DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); })
-  DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); })
-
-  //--- Objective-C --------------------------------------------------------//
-  DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); })
-  DEF_ADD_DATA(ObjCPropertyRefExpr, {
-    addData(S->isSuperReceiver());
-    addData(S->isImplicitProperty());
-  })
-  DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); })
-
-  //--- Miscellaneous Stmts ------------------------------------------------//
-  DEF_ADD_DATA(CXXFoldExpr, {
-    addData(S->isRightFold());
-    addData(S->getOperator());
-  })
-  DEF_ADD_DATA(GenericSelectionExpr, {
-    for (unsigned i = 0; i < S->getNumAssocs(); ++i) {
-      addData(S->getAssocType(i));
-    }
-  })
-  DEF_ADD_DATA(LambdaExpr, {
-    for (const LambdaCapture &C : S->captures()) {
-      addData(C.isPackExpansion());
-      addData(C.getCaptureKind());
-      if (C.capturesVariable())
-        addData(C.getCapturedVar()->getType());
-    }
-    addData(S->isGenericLambda());
-    addData(S->isMutable());
-  })
-  DEF_ADD_DATA(DeclStmt, {
-    auto numDecls = std::distance(S->decl_begin(), S->decl_end());
-    addData(static_cast<DataPiece>(numDecls));
-    for (const Decl *D : S->decls()) {
-      if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
-        addData(VD->getType());
-      }
-    }
-  })
-  DEF_ADD_DATA(AsmStmt, {
-    addData(S->isSimple());
-    addData(S->isVolatile());
-    addData(S->generateAsmString(Context));
-    for (unsigned i = 0; i < S->getNumInputs(); ++i) {
-      addData(S->getInputConstraint(i));
-    }
-    for (unsigned i = 0; i < S->getNumOutputs(); ++i) {
-      addData(S->getOutputConstraint(i));
-    }
-    for (unsigned i = 0; i < S->getNumClobbers(); ++i) {
-      addData(S->getClobber(i));
-    }
-  })
-  DEF_ADD_DATA(AttributedStmt, {
-    for (const Attr *A : S->getAttrs()) {
-      addData(std::string(A->getSpelling()));
-    }
-  })
-};
-} // namespace clone_detection
-
 /// Identifies a list of statements.
 ///
 /// Can either identify a single arbitrary Stmt object, a continuous sequence of

Modified: cfe/trunk/lib/AST/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CMakeLists.txt?rev=311569&r1=311568&r2=311569&view=diff
==============================================================================
--- cfe/trunk/lib/AST/CMakeLists.txt (original)
+++ cfe/trunk/lib/AST/CMakeLists.txt Wed Aug 23 09:28:26 2017
@@ -20,6 +20,7 @@ add_clang_library(clangAST
   CommentLexer.cpp
   CommentParser.cpp
   CommentSema.cpp
+  DataCollection.cpp
   Decl.cpp
   DeclarationName.cpp
   DeclBase.cpp

Added: cfe/trunk/lib/AST/DataCollection.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/DataCollection.cpp?rev=311569&view=auto
==============================================================================
--- cfe/trunk/lib/AST/DataCollection.cpp (added)
+++ cfe/trunk/lib/AST/DataCollection.cpp Wed Aug 23 09:28:26 2017
@@ -0,0 +1,50 @@
+//===-- DataCollection.cpp --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/DataCollection.h"
+
+#include "clang/Lex/Lexer.h"
+
+namespace clang {
+namespace data_collection {
+
+/// Prints the macro name that contains the given SourceLocation into the given
+/// raw_string_ostream.
+static void printMacroName(llvm::raw_string_ostream &MacroStack,
+                           ASTContext &Context, SourceLocation Loc) {
+  MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(),
+                                             Context.getLangOpts());
+
+  // Add an empty space at the end as a padding to prevent
+  // that macro names concatenate to the names of other macros.
+  MacroStack << " ";
+}
+
+/// Returns a string that represents all macro expansions that expanded into the
+/// given SourceLocation.
+///
+/// If 'getMacroStack(A) == getMacroStack(B)' is true, then the SourceLocations
+/// A and B are expanded from the same macros in the same order.
+std::string getMacroStack(SourceLocation Loc, ASTContext &Context) {
+  std::string MacroStack;
+  llvm::raw_string_ostream MacroStackStream(MacroStack);
+  SourceManager &SM = Context.getSourceManager();
+
+  // Iterate over all macros that expanded into the given SourceLocation.
+  while (Loc.isMacroID()) {
+    // Add the macro name to the stream.
+    printMacroName(MacroStackStream, Context, Loc);
+    Loc = SM.getImmediateMacroCallerLoc(Loc);
+  }
+  MacroStackStream.flush();
+  return MacroStack;
+}
+
+} // end namespace data_collection
+} // end namespace clang

Added: cfe/trunk/lib/AST/StmtDataCollectors.inc
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/StmtDataCollectors.inc?rev=311569&view=auto
==============================================================================
--- cfe/trunk/lib/AST/StmtDataCollectors.inc (added)
+++ cfe/trunk/lib/AST/StmtDataCollectors.inc Wed Aug 23 09:28:26 2017
@@ -0,0 +1,141 @@
+// The functions below collect the class specific data of each Stmt subclass.
+
+DEF_ADD_DATA(Stmt, {
+  addData(S->getStmtClass());
+  // This ensures that non-macro-generated code isn't identical to
+  // macro-generated code.
+  addData(data_collection::getMacroStack(S->getLocStart(), Context));
+  addData(data_collection::getMacroStack(S->getLocEnd(), Context));
+})
+DEF_ADD_DATA(Expr, { addData(S->getType()); })
+
+//--- Builtin functionality ----------------------------------------------//
+DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); })
+DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); })
+DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); })
+DEF_ADD_DATA(TypeTraitExpr, {
+  addData(S->getTrait());
+  for (unsigned i = 0; i < S->getNumArgs(); ++i)
+    addData(S->getArg(i)->getType());
+})
+
+//--- Calls --------------------------------------------------------------//
+DEF_ADD_DATA(CallExpr, {
+  // Function pointers don't have a callee and we just skip hashing it.
+  if (const FunctionDecl *D = S->getDirectCallee()) {
+    // If the function is a template specialization, we also need to handle
+    // the template arguments as they are not included in the qualified name.
+    if (auto Args = D->getTemplateSpecializationArgs()) {
+      std::string ArgString;
+
+      // Print all template arguments into ArgString
+      llvm::raw_string_ostream OS(ArgString);
+      for (unsigned i = 0; i < Args->size(); ++i) {
+        Args->get(i).print(Context.getLangOpts(), OS);
+        // Add a padding character so that 'foo<X, XX>()' != 'foo<XX, X>()'.
+        OS << '\n';
+      }
+      OS.flush();
+
+      addData(ArgString);
+    }
+    addData(D->getQualifiedNameAsString());
+  }
+})
+
+//--- Value references ---------------------------------------------------//
+DEF_ADD_DATA(DeclRefExpr,
+             { addData(S->getDecl()->getQualifiedNameAsString()); })
+DEF_ADD_DATA(MemberExpr,
+             { addData(S->getMemberDecl()->getName()); })
+
+//--- Literals -----------------------------------------------------------//
+DEF_ADD_DATA(IntegerLiteral, { addData(llvm::hash_value(S->getValue())); })
+DEF_ADD_DATA(FloatingLiteral, { addData(llvm::hash_value(S->getValue())); })
+DEF_ADD_DATA(StringLiteral, { addData(S->getString()); })
+DEF_ADD_DATA(CXXBoolLiteralExpr, { addData(S->getValue()); })
+DEF_ADD_DATA(CharacterLiteral, { addData(S->getValue()); })
+
+//--- Exceptions ---------------------------------------------------------//
+DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); })
+
+//--- C++ OOP Stmts ------------------------------------------------------//
+DEF_ADD_DATA(CXXDeleteExpr, {
+  addData(S->isArrayFormAsWritten());
+  addData(S->isGlobalDelete());
+})
+
+//--- Casts --------------------------------------------------------------//
+DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); })
+
+//--- Miscellaneous Exprs ------------------------------------------------//
+DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); })
+DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); })
+
+//--- Control flow -------------------------------------------------------//
+DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); })
+DEF_ADD_DATA(IndirectGotoStmt, {
+  if (S->getConstantTarget())
+    addData(S->getConstantTarget()->getName());
+})
+DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); })
+DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); })
+DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); })
+
+//--- Objective-C --------------------------------------------------------//
+DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); })
+DEF_ADD_DATA(ObjCPropertyRefExpr, {
+  addData(S->isSuperReceiver());
+  addData(S->isImplicitProperty());
+})
+DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); })
+
+//--- Miscellaneous Stmts ------------------------------------------------//
+DEF_ADD_DATA(CXXFoldExpr, {
+  addData(S->isRightFold());
+  addData(S->getOperator());
+})
+DEF_ADD_DATA(GenericSelectionExpr, {
+  for (unsigned i = 0; i < S->getNumAssocs(); ++i) {
+    addData(S->getAssocType(i));
+  }
+})
+DEF_ADD_DATA(LambdaExpr, {
+  for (const LambdaCapture &C : S->captures()) {
+    addData(C.isPackExpansion());
+    addData(C.getCaptureKind());
+    if (C.capturesVariable())
+      addData(C.getCapturedVar()->getType());
+  }
+  addData(S->isGenericLambda());
+  addData(S->isMutable());
+})
+DEF_ADD_DATA(DeclStmt, {
+  auto numDecls = std::distance(S->decl_begin(), S->decl_end());
+  addData(static_cast<unsigned>(numDecls));
+  for (const Decl *D : S->decls()) {
+    if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
+      addData(VD->getType());
+    }
+  }
+})
+DEF_ADD_DATA(AsmStmt, {
+  addData(S->isSimple());
+  addData(S->isVolatile());
+  addData(S->generateAsmString(Context));
+  for (unsigned i = 0; i < S->getNumInputs(); ++i) {
+    addData(S->getInputConstraint(i));
+  }
+  for (unsigned i = 0; i < S->getNumOutputs(); ++i) {
+    addData(S->getOutputConstraint(i));
+  }
+  for (unsigned i = 0; i < S->getNumClobbers(); ++i) {
+    addData(S->getClobber(i));
+  }
+})
+DEF_ADD_DATA(AttributedStmt, {
+  for (const Attr *A : S->getAttrs()) {
+    addData(std::string(A->getSpelling()));
+  }
+})
+#undef DEF_ADD_DATA

Modified: cfe/trunk/lib/Analysis/CloneDetection.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Analysis/CloneDetection.cpp?rev=311569&r1=311568&r2=311569&view=diff
==============================================================================
--- cfe/trunk/lib/Analysis/CloneDetection.cpp (original)
+++ cfe/trunk/lib/Analysis/CloneDetection.cpp Wed Aug 23 09:28:26 2017
@@ -13,16 +13,12 @@
 
 #include "clang/Analysis/CloneDetection.h"
 
-#include "clang/AST/ASTContext.h"
-#include "clang/AST/RecursiveASTVisitor.h"
-#include "clang/AST/Stmt.h"
-#include "clang/Lex/Lexer.h"
+#include "clang/AST/DataCollection.h"
+#include "clang/AST/DeclTemplate.h"
 #include "llvm/Support/MD5.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Path.h"
 
 using namespace clang;
-using namespace clang::clone_detection;
 
 StmtSequence::StmtSequence(const CompoundStmt *Stmt, const Decl *D,
                            unsigned StartIndex, unsigned EndIndex)
@@ -91,34 +87,6 @@ SourceRange StmtSequence::getSourceRange
   return SourceRange(getStartLoc(), getEndLoc());
 }
 
-/// Prints the macro name that contains the given SourceLocation into the given
-/// raw_string_ostream.
-static void printMacroName(llvm::raw_string_ostream &MacroStack,
-                           ASTContext &Context, SourceLocation Loc) {
-  MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(),
-                                             Context.getLangOpts());
-
-  // Add an empty space at the end as a padding to prevent
-  // that macro names concatenate to the names of other macros.
-  MacroStack << " ";
-}
-
-std::string clone_detection::getMacroStack(SourceLocation Loc,
-                                           ASTContext &Context) {
-  std::string MacroStack;
-  llvm::raw_string_ostream MacroStackStream(MacroStack);
-  SourceManager &SM = Context.getSourceManager();
-
-  // Iterate over all macros that expanded into the given SourceLocation.
-  while (Loc.isMacroID()) {
-    // Add the macro name to the stream.
-    printMacroName(MacroStackStream, Context, Loc);
-    Loc = SM.getImmediateMacroCallerLoc(Loc);
-  }
-  MacroStackStream.flush();
-  return MacroStack;
-}
-
 void CloneDetector::analyzeCodeBody(const Decl *D) {
   assert(D);
   assert(D->hasBody());
@@ -184,16 +152,17 @@ void OnlyLargestCloneConstraint::constra
   }
 }
 
-bool FilenamePatternConstraint::isAutoGenerated(const CloneDetector::CloneGroup &Group) {
+bool FilenamePatternConstraint::isAutoGenerated(
+    const CloneDetector::CloneGroup &Group) {
   std::string Error;
-  if (IgnoredFilesPattern.empty() || Group.empty() || 
+  if (IgnoredFilesPattern.empty() || Group.empty() ||
       !IgnoredFilesRegex->isValid(Error))
     return false;
 
   for (const StmtSequence &S : Group) {
     const SourceManager &SM = S.getASTContext().getSourceManager();
-    StringRef Filename = llvm::sys::path::filename(SM.getFilename(
-        S.getContainingDecl()->getLocation()));
+    StringRef Filename = llvm::sys::path::filename(
+        SM.getFilename(S.getContainingDecl()->getLocation()));
     if (IgnoredFilesRegex->match(Filename))
       return true;
   }
@@ -201,6 +170,59 @@ bool FilenamePatternConstraint::isAutoGe
   return false;
 }
 
+/// This class defines what a type II code clone is: If it collects for two
+/// statements the same data, then those two statements are considered to be
+/// clones of each other.
+///
+/// All collected data is forwarded to the given data consumer of the type T.
+/// The data consumer class needs to provide a member method with the signature:
+///   update(StringRef Str)
+namespace {
+template <class T>
+class CloneTypeIIStmtDataCollector
+    : public ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>> {
+  ASTContext &Context;
+  /// The data sink to which all data is forwarded.
+  T &DataConsumer;
+
+  template <class Ty> void addData(const Ty &Data) {
+    data_collection::addDataToConsumer(DataConsumer, Data);
+  }
+
+public:
+  CloneTypeIIStmtDataCollector(const Stmt *S, ASTContext &Context,
+                               T &DataConsumer)
+      : Context(Context), DataConsumer(DataConsumer) {
+    this->Visit(S);
+  }
+
+// Define a visit method for each class to collect data and subsequently visit
+// all parent classes. This uses a template so that custom visit methods by us
+// take precedence.
+#define DEF_ADD_DATA(CLASS, CODE)                                              \
+  template <class = void> void Visit##CLASS(const CLASS *S) {                  \
+    CODE;                                                                      \
+    ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S);        \
+  }
+
+#include "../AST/StmtDataCollectors.inc"
+
+// Type II clones ignore variable names and literals, so let's skip them.
+#define SKIP(CLASS)                                                            \
+  void Visit##CLASS(const CLASS *S) {                                          \
+    ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S);        \
+  }
+  SKIP(DeclRefExpr)
+  SKIP(MemberExpr)
+  SKIP(IntegerLiteral)
+  SKIP(FloatingLiteral)
+  SKIP(StringLiteral)
+  SKIP(CXXBoolLiteralExpr)
+  SKIP(CharacterLiteral)
+#undef SKIP
+};
+} // end anonymous namespace
+
 static size_t createHash(llvm::MD5 &Hash) {
   size_t HashCode;
 
@@ -222,7 +244,7 @@ size_t RecursiveCloneTypeIIConstraint::s
   llvm::MD5 Hash;
   ASTContext &Context = D->getASTContext();
 
-  StmtDataCollector<llvm::MD5>(S, Context, Hash);
+  CloneTypeIIStmtDataCollector<llvm::MD5>(S, Context, Hash);
 
   auto CS = dyn_cast<CompoundStmt>(S);
   SmallVector<size_t, 8> ChildHashes;
@@ -288,8 +310,8 @@ public:
 static void CollectStmtSequenceData(const StmtSequence &Sequence,
                                     FoldingSetNodeIDWrapper &OutputData) {
   for (const Stmt *S : Sequence) {
-    StmtDataCollector<FoldingSetNodeIDWrapper>(S, Sequence.getASTContext(),
-                                               OutputData);
+    CloneTypeIIStmtDataCollector<FoldingSetNodeIDWrapper>(
+        S, Sequence.getASTContext(), OutputData);
 
     for (const Stmt *Child : S->children()) {
       if (!Child)
@@ -339,7 +361,7 @@ void RecursiveCloneTypeIIConstraint::con
     // Sort hash_codes in StmtsByHash.
     std::stable_sort(StmtsByHash.begin(), StmtsByHash.end(),
                      [](std::pair<size_t, StmtSequence> LHS,
-                            std::pair<size_t, StmtSequence> RHS) {
+                        std::pair<size_t, StmtSequence> RHS) {
                        return LHS.first < RHS.first;
                      });
 
@@ -393,8 +415,10 @@ size_t MinComplexityConstraint::calculat
   ASTContext &Context = Seq.getASTContext();
 
   // Look up what macros expanded into the current statement.
-  std::string StartMacroStack = getMacroStack(Seq.getStartLoc(), Context);
-  std::string EndMacroStack = getMacroStack(Seq.getEndLoc(), Context);
+  std::string StartMacroStack =
+      data_collection::getMacroStack(Seq.getStartLoc(), Context);
+  std::string EndMacroStack =
+      data_collection::getMacroStack(Seq.getEndLoc(), Context);
 
   // First, check if ParentMacroStack is not empty which means we are currently
   // dealing with a parent statement which was expanded from a macro.

Modified: cfe/trunk/unittests/AST/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/CMakeLists.txt?rev=311569&r1=311568&r2=311569&view=diff
==============================================================================
--- cfe/trunk/unittests/AST/CMakeLists.txt (original)
+++ cfe/trunk/unittests/AST/CMakeLists.txt Wed Aug 23 09:28:26 2017
@@ -9,6 +9,7 @@ add_clang_unittest(ASTTests
   ASTVectorTest.cpp
   CommentLexer.cpp
   CommentParser.cpp
+  DataCollectionTest.cpp
   DeclPrinterTest.cpp
   DeclTest.cpp
   EvaluateAsRValueTest.cpp

Added: cfe/trunk/unittests/AST/DataCollectionTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/DataCollectionTest.cpp?rev=311569&view=auto
==============================================================================
--- cfe/trunk/unittests/AST/DataCollectionTest.cpp (added)
+++ cfe/trunk/unittests/AST/DataCollectionTest.cpp Wed Aug 23 09:28:26 2017
@@ -0,0 +1,173 @@
+//===- unittests/AST/DataCollectionTest.cpp -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains tests for the DataCollection module.
+//
+// They work by hashing the collected data of two nodes and asserting that the
+// hash values are equal iff the nodes are considered equal.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/DataCollection.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Tooling/Tooling.h"
+#include "gtest/gtest.h"
+
+using namespace clang;
+using namespace tooling;
+using namespace ast_matchers;
+
+namespace {
+class StmtDataCollector : public ConstStmtVisitor<StmtDataCollector> {
+  ASTContext &Context;
+  llvm::MD5 &DataConsumer;
+
+  template <class T> void addData(const T &Data) {
+    data_collection::addDataToConsumer(DataConsumer, Data);
+  }
+
+public:
+  StmtDataCollector(const Stmt *S, ASTContext &Context, llvm::MD5 &DataConsumer)
+      : Context(Context), DataConsumer(DataConsumer) {
+    this->Visit(S);
+  }
+
+#define DEF_ADD_DATA(CLASS, CODE)                                              \
+  template <class Dummy = void> Dummy Visit##CLASS(const CLASS *S) {           \
+    CODE;                                                                      \
+    ConstStmtVisitor<StmtDataCollector>::Visit##CLASS(S);                      \
+  }
+
+#include "../../lib/AST/StmtDataCollectors.inc"
+};
+} // end anonymous namespace
+
+namespace {
+struct StmtHashMatch : public MatchFinder::MatchCallback {
+  unsigned NumFound;
+  llvm::MD5::MD5Result &Hash;
+  StmtHashMatch(llvm::MD5::MD5Result &Hash) : NumFound(0), Hash(Hash) {}
+
+  void run(const MatchFinder::MatchResult &Result) override {
+    const Stmt *S = Result.Nodes.getNodeAs<Stmt>("id");
+    if (!S)
+      return;
+    ++NumFound;
+    if (NumFound > 1)
+      return;
+    llvm::MD5 MD5;
+    StmtDataCollector(S, *Result.Context, MD5);
+    MD5.final(Hash);
+  }
+};
+} // end anonymous namespace
+
+static testing::AssertionResult hashStmt(llvm::MD5::MD5Result &Hash,
+                                         const StatementMatcher &StmtMatch,
+                                         StringRef Code) {
+  StmtHashMatch Hasher(Hash);
+  MatchFinder Finder;
+  Finder.addMatcher(StmtMatch, &Hasher);
+  std::unique_ptr<FrontendActionFactory> Factory(
+      newFrontendActionFactory(&Finder));
+  if (!runToolOnCode(Factory->create(), Code))
+    return testing::AssertionFailure()
+           << "Parsing error in \"" << Code.str() << "\"";
+  if (Hasher.NumFound == 0)
+    return testing::AssertionFailure() << "Matcher didn't find any statements";
+  if (Hasher.NumFound > 1)
+    return testing::AssertionFailure()
+           << "Matcher should match only one statement "
+              "(found "
+           << Hasher.NumFound << ")";
+  return testing::AssertionSuccess();
+}
+
+static testing::AssertionResult
+isStmtHashEqual(const StatementMatcher &StmtMatch, StringRef Code1,
+                StringRef Code2) {
+  llvm::MD5::MD5Result Hash1, Hash2;
+  testing::AssertionResult Result = hashStmt(Hash1, StmtMatch, Code1);
+  if (!Result)
+    return Result;
+  if (!(Result = hashStmt(Hash2, StmtMatch, Code2)))
+    return Result;
+
+  return testing::AssertionResult(Hash1 == Hash2);
+}
+
+TEST(StmtDataCollector, TestDeclRefExpr) {
+  ASSERT_TRUE(isStmtHashEqual(declRefExpr().bind("id"), "int x, r = x;",
+                              "int x, r = x;"));
+  ASSERT_FALSE(isStmtHashEqual(declRefExpr().bind("id"), "int x, r = x;",
+                               "int y, r = y;"));
+  ASSERT_FALSE(isStmtHashEqual(declRefExpr().bind("id"), "int x, r = x;",
+                               "namespace n { int x, r = x; };"));
+}
+
+TEST(StmtDataCollector, TestMemberExpr) {
+  ASSERT_TRUE(isStmtHashEqual(memberExpr().bind("id"),
+                              "struct { int x; } X; int r = X.x;",
+                              "struct { int x; } X; int r = (&X)->x;"));
+  ASSERT_TRUE(isStmtHashEqual(memberExpr().bind("id"),
+                              "struct { int x; } X; int r = X.x;",
+                              "struct { int x; } Y; int r = Y.x;"));
+  ASSERT_TRUE(isStmtHashEqual(memberExpr().bind("id"),
+                              "struct { int x; } X; int r = X.x;",
+                              "struct C { int x; } X; int r = X.C::x;"));
+  ASSERT_FALSE(isStmtHashEqual(memberExpr().bind("id"),
+                               "struct { int x; } X; int r = X.x;",
+                               "struct { int y; } X; int r = X.y;"));
+}
+
+TEST(StmtDataCollector, TestIntegerLiteral) {
+  ASSERT_TRUE(
+      isStmtHashEqual(integerLiteral().bind("id"), "int x = 0;", "int x = 0;"));
+  ASSERT_TRUE(
+      isStmtHashEqual(integerLiteral().bind("id"), "int x = 0;", "int x =00;"));
+  ASSERT_FALSE(
+      isStmtHashEqual(integerLiteral().bind("id"), "int x = 0;", "int x = 1;"));
+}
+
+TEST(StmtDataCollector, TestFloatingLiteral) {
+  ASSERT_TRUE(isStmtHashEqual(floatLiteral().bind("id"), "double x = .0;",
+                              "double x = .0;"));
+  ASSERT_TRUE(isStmtHashEqual(floatLiteral().bind("id"), "double x = .10;",
+                              "double x = .1;"));
+  ASSERT_TRUE(isStmtHashEqual(floatLiteral().bind("id"), "double x = .1;",
+                              "double x = 1e-1;"));
+  ASSERT_FALSE(isStmtHashEqual(floatLiteral().bind("id"), "double x = .0;",
+                               "double x = .1;"));
+}
+
+TEST(StmtDataCollector, TestStringLiteral) {
+  ASSERT_TRUE(isStmtHashEqual(stringLiteral().bind("id"), R"(char x[] = "0";)",
+                              R"(char x[] = "0";)"));
+  ASSERT_FALSE(isStmtHashEqual(stringLiteral().bind("id"), R"(char x[] = "0";)",
+                               R"(char x[] = "1";)"));
+}
+
+TEST(StmtDataCollector, TestCXXBoolLiteral) {
+  ASSERT_TRUE(isStmtHashEqual(cxxBoolLiteral().bind("id"), "bool x = false;",
+                              "bool x = false;"));
+  ASSERT_FALSE(isStmtHashEqual(cxxBoolLiteral().bind("id"), "bool x = false;",
+                               "bool x = true;"));
+}
+
+TEST(StmtDataCollector, TestCharacterLiteral) {
+  ASSERT_TRUE(isStmtHashEqual(characterLiteral().bind("id"), "char x = '0';",
+                              "char x = '0';"));
+  ASSERT_TRUE(isStmtHashEqual(characterLiteral().bind("id"),
+                              R"(char x = '\0';)",
+                              R"(char x = '\x00';)"));
+  ASSERT_FALSE(isStmtHashEqual(characterLiteral().bind("id"), "char x = '0';",
+                               "char x = '1';"));
+}




More information about the cfe-commits mailing list