[cfe-commits] [PATCH] call graph building

Wed Jul 15 14:36:46 PDT 2009

I think this looks good, especially for an initial patch.  We  
obviously will refine it over time.

One nit:

   llvm::cl::ParseCommandLineOptions(argc, argv, "clang analyzer");

should be:

   llvm::cl::ParseCommandLineOptions(argc, argv, "clang-wpa");

On Jul 14, 2009, at 10:03 PM, Zhongxing Xu wrote:

> New files attached. To print decl names, a new mapping from caller's
> nodes to their ASTContexts is added.
>
> On Wed, Jul 15, 2009 at 8:33 AM, Ted Kremenek<kremenek at apple.com>  
> wrote:
>> On Jul 13, 2009, at 6:31 AM, Zhongxing Xu wrote:
>>
>> This is an initial implementation of call graph building based on pch
>> reader and the new Program/Entity facility. It is very primitive but
>> functional.
>> Put the files under clang/tools/wpa (meaning 'whole program
>> analysis'). After 'make', a new command 'clang-analyze' will be
>> generated. It takes a list of .ast files and build call graph over
>> them.
>>
>> Hi Zhongxing,
>> I would call the program 'clang-callgraph' so that people don't  
>> confuse it
>> with the "static analyzer".  Comments inline.
>>
>> #include "CallGraph.h"
>> #include "clang/AST/ASTContext.h"
>> #include "clang/AST/StmtVisitor.h"
>> using namespace clang;
>> using namespace idx;
>> CallGraph *CallGraph::G = 0;
>> namespace {
>> class CGBuilder : public StmtVisitor<CGBuilder> {
>>   CallGraph &G;
>>   FunctionDecl *FD;
>> public:
>>   CGBuilder(CallGraph &g, FunctionDecl *fd)
>>     : G(g), FD(fd) {}
>>   void VisitCompoundStmt(CompoundStmt *S) {
>>     VisitChildren(S);
>>   }
>>   void VisitCallExpr(CallExpr *CE);
>>   void VisitChildren(Stmt *S) {
>>     for (Stmt::child_iterator I=S->child_begin(), E=S->child_end();  
>> I !=
>> E;++I)
>>       if (*I)
>>         static_cast<CGBuilder*>(this)->Visit(*I);
>>   }
>> };
>> }
>>
>> You can make this a little shorter by using 'CFGRecStmtVisitor'  
>> instead of
>> StmtVisitor.  It basically does the recursion for you.
>
> I did look at 'CFGRecStmtVisitor'. Call graph building does not need
> CFG building. I think a plain StmtVisitor is enough.
>
>>
>> void CGBuilder::VisitCallExpr(CallExpr *CE) {
>>   const Entity *CallerEnt = Entity::get(FD, G.getProgram());
>>
>> Seems like you can make things a little faster by making  
>> 'CallerEnt' an
>> instance variable that is initialized to NULL, and then lazily  
>> initialize it
>> here if it is NULL.  This will speed things up when there are no  
>> calls.
>>
>>   CallGraphNode *Node = G.getOrInsertFunction(CallerEnt);
>>
>> This too can probably be memoized, since it seems that a CGBuilder  
>> is built
>> on a per-function basis.
>>
>>   Expr *Callee = CE->getCallee();
>>   if (CastExpr *CE = dyn_cast<CastExpr>(Callee))
>>     Callee = CE->getSubExpr();
>>   if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Callee)) {
>>     Decl *D = DRE->getDecl();
>>     if (FunctionDecl *CalleeDecl = dyn_cast<FunctionDecl>(D)) {
>>       const Entity *Ent = Entity::get(CalleeDecl, G.getProgram());
>>       CallGraphNode *CalleeNode = G.getOrInsertFunction(Ent);
>>       Node->addCallee(ASTLocation(FD, CE), CalleeNode);
>>     }
>>   }
>> }
>>
>> Seems reasonable.
>>
>> void CallGraph::addTU(const ASTUnit &AST) {
>>   if (!G)
>>     G = new CallGraph();
>>   const ASTContext &Ctx = AST.getASTContext();
>>   DeclContext *DC = Ctx.getTranslationUnitDecl();
>>   for (DeclContext::decl_iterator I = DC->decls_begin(), E =
>> DC->decls_end();
>>        I != E; ++I) {
>>     if (FunctionDecl *FD = dyn_cast<FunctionDecl>(*I)) {
>>       if (FD->isThisDeclarationADefinition()) {
>>         CGBuilder builder(*G, FD);
>>         builder.Visit(FD->getBody());
>>       }
>>     }
>>   }
>> }
>>
>> Seems reasonable.  I'm not certain if this will handle code buried  
>> in C++
>> namespaces, but this implementation right now doesn't handle C++  
>> anyway (so
>> it can be done later).
>>
>> CallGraphNode *CallGraph::getOrInsertFunction(const Entity *F) {
>>   CallGraphNode *&Node = FunctionMap[F];
>>   if (Node)
>>     return Node;
>>   return Node = new CallGraphNode(F);
>> }
>>
>> Seems reasonable.
>>
>> #ifndef LLVM_CLANG_ANALYSIS_CALLGRAPH
>> #define LLVM_CLANG_ANALYSIS_CALLGRAPH
>> #include "clang/Index/ASTLocation.h"
>> #include "clang/Index/Entity.h"
>> #include "clang/Index/Program.h"
>> #include "clang/Frontend/ASTUnit.h"
>> #include <vector>
>> #include <map>
>> namespace clang {
>> class CallGraphNode {
>>   const idx::Entity *F;
>>   typedef std::pair<idx::ASTLocation, CallGraphNode*> CallRecord;
>>   std::vector<CallRecord> CalledFunctions;
>> public:
>>   CallGraphNode(const idx::Entity *f) : F(f) {}
>>   void addCallee(idx::ASTLocation L, CallGraphNode *Node) {
>>     CalledFunctions.push_back(std::make_pair(L, Node));
>>   }
>> };
>> class CallGraph {
>>   static CallGraph *G;
>>
>>   idx::Program Prog;
>>   typedef std::map<const idx::Entity *, CallGraphNode *>  
>> FunctionMapTy;
>>
>> Eventually, if you care about speed, you'll probably want to use a  
>> DenseMap.
>>  I don't see you using the sortedness property of std::map here.
>>
>>   FunctionMapTy FunctionMap;
>> public:
>>   static CallGraph *get() { return G; }
>>   static void addTU(const ASTUnit &AST);
>>
>> Is there a reason you are using static methods/instance variables?   
>> I know
>> this is a small program, but if you wanted to design this to be more
>> reusable I think you'll want to to avoid using them in this way.
>
> I originally wanted to use Singleton pattern. Now I changed to a plain
> implementation.
>
>>
>>   idx::Program &getProgram() { return Prog; }
>>   CallGraphNode *getOrInsertFunction(const idx::Entity * F);
>> };
>> }
>> #endif
>>
>> #include "CallGraph.h"
>> #include "clang/Basic/FileManager.h"
>> #include "clang/Index/TranslationUnit.h"
>> #include "llvm/Support/CommandLine.h"
>> #include "llvm/Support/raw_ostream.h"
>> using namespace clang;
>> using namespace idx;
>> static llvm::cl::list<std::string>
>> InputFilenames(llvm::cl::Positional, llvm::cl::desc("<input AST  
>> files>"));
>> class TUnit : public TranslationUnit {
>> public:
>>   TUnit(ASTUnit *ast, const std::string &filename)
>>     : AST(ast), Filename(filename) {}
>>   ASTContext &getASTContext() { return AST->getASTContext(); }
>>   llvm::OwningPtr<ASTUnit> AST;
>>   std::string Filename;
>> };
>> int main(int argc, char **argv) {
>>   llvm::cl::ParseCommandLineOptions(argc, argv, "clang analyzer");
>>
>> "clang analyzer" is the name people associate with the static  
>> analyzer.  I'd
>> just use 'clang-callgraph', since that is what this tool does.
>>
>>   FileManager FileMgr;
>>   std::vector<TUnit*> TUnits;
>>   if (InputFilenames.empty())
>>     return 0;
>>   for (unsigned i = 0, e = InputFilenames.size(); i != e; ++i) {
>>     const std::string &InFile = InputFilenames[i];
>>     std::string ErrMsg;
>>     llvm::OwningPtr<ASTUnit> AST;
>>     AST.reset(ASTUnit::LoadFromPCHFile(InFile, FileMgr, &ErrMsg));
>>     if (!AST) {
>>       llvm::errs() << "[" << InFile << "] error: " << ErrMsg << '\n';
>>       return 1;
>>     }
>>     TUnit *TU = new TUnit(AST.take(), InFile);
>>     TUnits.push_back(TU);
>>   }
>>   for (unsigned i = 0, e = TUnits.size(); i != e; ++i)
>>     CallGraph::addTU(*(TUnits[i]->AST));
>>   CallGraph *CG = CallGraph::get();
>> }
>>
>> I think this has the potentially to be a nice little tool, but I  
>> think in
>> order for it to be useful it should produce some basic output.  I  
>> think
>> printing out the callgraph would be nice.  You can also defined some
>> GraphTraits for the CallGraph (similar to what we do in
>> clang/include/AST/CFG.h) to get automatic GraphViz visualization.
>> All in all I think this is cool, I can look forward to playing  
>> around with
>> it!
> <clang-wpa.cpp><CallGraph.cpp><CallGraph.h>

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20090715/c0c0e76f/attachment.html>