[cfe-dev] Problem with retrieving the BinaryOperator RHS end location
Nat! via cfe-dev
cfe-dev at lists.llvm.org
Sun May 5 04:33:32 PDT 2019
I think this is the simplest code, that exhibits my problem.
This code rewrites direct struct access like p->x
into a C function-call getX( p).
```
//------------------------------------------------------------------------------
// Tooling sample. Demonstrates:
//
// * How to write a simple source tool using libTooling.
// * How to use RecursiveASTVisitor to find interesting AST nodes.
// * How to use the Rewriter API to rewrite the source code.
//
// Eli Bendersky (eliben at gmail.com)
// This code is in the public domain
//------------------------------------------------------------------------------
#include <sstream>
#include <string>
#include "clang/AST/AST.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Frontend/ASTConsumers.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Rewrite/Core/Rewriter.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
using namespace clang::driver;
using namespace clang::tooling;
static llvm::cl::OptionCategory ToolingSampleCategory("Tooling Sample");
// By implementing RecursiveASTVisitor, we can specify which AST nodes
// we're interested in by overriding relevant methods.
class MyASTVisitor : public RecursiveASTVisitor<MyASTVisitor> {
public:
MyASTVisitor(Rewriter &R) : TheRewriter(R) {}
bool VisitMemberExpr(MemberExpr *Expr) {
SourceManager *SM;
int length;
const char *startBuf;
const char *endBuf;
const char *opBuf;
SM = &TheRewriter.getSourceMgr();
startBuf = SM->getCharacterData( Expr->getBeginLoc());
opBuf = SM->getCharacterData( Expr->getOperatorLoc());
endBuf = SM->getCharacterData( Expr->getEndLoc());
length = endBuf - startBuf;
std::string front( startBuf, opBuf - startBuf);
length += Lexer::MeasureTokenLength( Expr->getEndLoc(), *SM,
TheRewriter.getLangOpts());
std::string origin( startBuf, length);
std::string replace;
replace = std::string( "getX(") + front + std::string( ")");
fprintf( stderr, "rewrite: \"%s\" -> \"%s\"\n", origin.c_str(),
replace.c_str());
TheRewriter.ReplaceText( Expr->getBeginLoc(), length, replace);
return true;
}
private:
Rewriter &TheRewriter;
};
// Implementation of the ASTConsumer interface for reading an AST produced
// by the Clang parser.
class MyASTConsumer : public ASTConsumer {
public:
MyASTConsumer(Rewriter &R) : Visitor(R) {}
// Override the method that gets called for each parsed top-level
// declaration.
bool HandleTopLevelDecl(DeclGroupRef DR) override {
for (DeclGroupRef::iterator b = DR.begin(), e = DR.end(); b != e;
++b) {
// Traverse the declaration using our AST visitor.
Visitor.TraverseDecl(*b);
(*b)->dump();
}
return true;
}
private:
MyASTVisitor Visitor;
};I changed my theory is,
// For each source file provided to the tool, a new FrontendAction is
created.
class MyFrontendAction : public ASTFrontendAction {
public:
MyFrontendAction() {}
void EndSourceFileAction() override {
SourceManager &SM = TheRewriter.getSourceMgr();
llvm::errs() << "** EndSourceFileAction for: "
<< SM.getFileEntryForID(SM.getMainFileID())->getName()
<< "\n";
// Now emit the rewritten buffer.
TheRewriter.getEditBuffer(SM.getMainFileID()).write(llvm::outs());
}
std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
StringRef file) override {
llvm::errs() << "** Creating AST consumer for: " << file << "\n";
TheRewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
return llvm::make_unique<MyASTConsumer>(TheRewriter);
}
private:
Rewriter TheRewriter;
};
int main(int argc, const char **argv) {
CommonOptionsParser op(argc, argv, ToolingSampleCategory);
ClangTool Tool(op.getCompilations(), op.getSourcePathList());
return Tool.run(newFrontendActionFactory<MyFrontendAction>().get());
}
```
With input
```
struct _x
{
struct _x *x;
int y;
};
int getXXY( struct _x *p)
{
return( p->x->x->y);
}
```
it produces
```
RecordDecl 0x27cdad8 </usr/local/llvm/srcL/llvm-clang-samples/z.c:1:1,
line:5:1> line:1:8 struct _x definition
|-FieldDecl 0x282a620 <line:3:4, col:16> col:16 x 'struct _x *'
`-FieldDecl 0x282a680 <line:4:4, col:15> col:15 y 'int'
rewrite: "p->x->x->y" -> "getX(p->x->x)"
rewrite: "p->x->x" -> "getX(p->x)"
rewrite: "p->x" -> "getX(p)"
FunctionDecl 0x282a7e0 </usr/local/llvm/srcL/llvm-clang-samples/z.c:8:1,
line:11:1> line:8:7 getXXY 'int (struct _x *)'
|-ParmVarDecl 0x282a6f0 <col:15, col:26> col:26 used p 'struct _x *'
`-CompoundStmt 0x282aa00 <line:9:1, line:11:1>
`-ReturnStmt 0x282a9f0 <line:10:4, col:22>
`-ImplicitCastExpr 0x282a9d8 <col:10, col:22> 'int' <LValueToRValue>
`-ParenExpr 0x282a9b8 <col:10, col:22> 'int' lvalue
`-MemberExpr 0x282a988 <col:12, col:21> 'int' lvalue ->y 0x282a680
`-ImplicitCastExpr 0x282a970 <col:12, col:18> 'struct _x *'
<LValueToRValue>
`-MemberExpr 0x282a940 <col:12, col:18> 'struct _x *'
lvalue ->x 0x282a620
`-ImplicitCastExpr 0x282a928 <col:12, col:15> 'struct _x
*' <LValueToRValue>
`-MemberExpr 0x282a8f8 <col:12, col:15> 'struct _x *'
lvalue ->x 0x282a620
`-ImplicitCastExpr 0x282a8e0 <col:12> 'struct _x *'
<LValueToRValue>
`-DeclRefExpr 0x282a8c0 <col:12> 'struct _x *'
lvalue ParmVar 0x282a6f0 'p' 'struct _x *'
** EndSourceFileAction for: /usr/local/llvm/srcL/llvm-clang-samples/z.c
struct _x
{
struct _x *x;
int y;
};
int getXXY( struct _x *p)
{
return( getX(p)(p->x)>x->x));
}
```
From a few more tests along the way, my current pet theory is, that the
change from member access to a function call is tripping something up.
If I rewrite just the righthand side of the expression, it works OK.
Ciao
Nat!
On 04.05.19 14:28, Stephen Kelly via cfe-dev wrote:
> On 03/05/2019 14:28, Nat! via cfe-dev wrote:
>> It turns out the fix suggestion - at least as I implemented it -
>> works for simple cases, but not in general.
>>
>> ```
>> endBuf = SM->getCharacterData( Stmt->getRHS()->getEndLoc());
>> endBuf += Lexer::MeasureTokenLength( Stmt->getEndLoc(), *SM,
>> TheRewriter.getLangOpts());
>> ```
>>
>> I believe the problem is that the SourceManager I am using
>> to "measure" the length is looking at the unmodified sourcecode. But
>> the RewriteBuffer may already contain changes.
>>
>> So in my example if I am looking at `x = yyy` at the source but
>> in the rewrite buffer its now `x=yyy_renamed` it will not take the
>> extra characters into account.
>>
>> I should be measuring the contents of the RewriteBuffer instead,
>> with likely another SourceManager, but I lack the expertise to set
>> this up.
>
> Can you post a more-complete sscce example of what you are trying to do?
>
> Thanks,
>
> Stephen
>
> _______________________________________________
> cfe-dev mailing list
> cfe-dev at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-dev
More information about the cfe-dev
mailing list