[LLVMdev] How to duplicate a function?

Shawn Kim shawn.subscribe at gmail.com
Fri Sep 16 11:25:38 PDT 2011


Hi all,

Sorry for the inconvenient about the previous post. The files were not
attached. So I put them here again.

I am a newbie in LLVM and I am trying to replace the function like:

old function               ||            new function
==============================
=========
int haha(int a) {                        int haha(int a, char* ID) {

                            ===>


}                                            }

Of course in the newly replaced function "int haha(int, char*ID)", I want to
insert some instrumentation code.

Here is my code that I am working on till now and it generates segmentation
fault in the place I comment with "//////////////////////"
Can you help me? Any advice will be helpful because I am a beginner in llvm.


Thank you in advance.
Shawn.


duplicateFunction.cpp
=============================================================================

//===- duplicateFunction.cpp - Writing an LLVM Pass
-----------------------===//
//
//                     The LLVM Compiler Infrastructure
//
//===----------------------------------------------------------------------===//
//
// This file implements the LLVM duplicating function pass.
// It starts by computing a new prototype for the function,
// which is the same as the old function, but has an extra argument.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Pass.h"
#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/InstrTypes.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/BasicBlock.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/CallSite.h"
using namespace llvm;

namespace {
    Constant *f;
    Function *Fn;
    FunctionType *FTy;
    Type *RetTy;
    std::vector<Type*> Params;
    class DP : public FunctionPass {

      public:
        static char ID;

        DP() : FunctionPass(ID) {}

        virtual bool doInitialization(Module &M);
        virtual bool runOnFunction(Function &F);
        virtual bool doFinalization(Module &mdl) {
            mdl.dump();
            return true;
        }
    }; /* class */
}

char DP::ID = 0;
static RegisterPass<DP> IC("duplicateFunction", "Duplicate Function Pass");


bool DP::doInitialization(Module &M) {

    // find the function that we want to change.
    Fn = M.getFunction("haha");

    // Start by computing a new prototype for the function, which is the
same as
    // the old function, but has an extra argument.
    FTy = Fn->getFunctionType();

    // Find out the return value.
    RetTy = FTy->getReturnType();

    // set the calling convention to C.
    // so, we interoperate with C Code properly.
    Function *tmp = cast<Function>(Fn);
    tmp->setCallingConv(CallingConv::C);

    return true;
}

bool DP::runOnFunction(Function &F) {
#if 0
    Value *param;

    // Find the instruction before which you want to insert the function
call
    Instruction *nextInstr = F.back().getTerminator();

    // Create the actual parameter for the function call
    param = ConstantInt::get(Type::getInt32Ty(F.getContext()), 333);

    // create and insert the function call
    //CallInst::Create(f, param, "", nextInstr);
    CallInst::Create(Fn, param, "", nextInstr);

    // indicates that we changed the code
    //return true;
#endif
    Type *NRetTy;

    std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
    FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params,
false);

    // Create the new function body and insert it into the module...
    Function *NF = Function::Create(NFTy, Fn->getLinkage());
    NF->copyAttributesFrom(Fn);
    Fn->getParent()->getFunctionList().insert(Fn, NF);
    NF->takeName(Fn);

    for (Function::arg_iterator AI=F.arg_begin(), AE=F.arg_end(),
NAI=NF->arg_begin();
         AI != AE; ++AI, ++NAI) {
        NAI->takeName(AI);
    }

    // Since we have now create the new function, splice the body of the old
    // function right into the new function, leaving the old rotting hulk of
the
    // function empty.
    NF->getBasicBlockList().splice(NF->begin(), F.getBasicBlockList());

    llvm::Value *Globals = --NF->arg_end();
    Globals->setName("IOCallIDs");

    // Now, exploit all return instructions.
    for (Function::iterator BI = NF->begin(), BE = NF->end(); BI != BE;
++BI) {
        if (ReturnInst *RI =
llvm::dyn_cast<llvm::ReturnInst>(BI->getTerminator())) {
            // Don't support functions that have multiple return values.
            assert(RI->getNumOperands() < 2);

            // Insert a new load instruction to return.

/////////////////////////////////////////////////////////////////////////////////////
HERE, GENERATE ERROR

/////////////////////////////////////////////////////////////////////////////////////
            Value *Load = new llvm::LoadInst(Globals, "globalsret", RI);

/////////////////////////////////////////////////////////////////////////////////////

            // Return type is void
            if ( RetTy->isVoidTy() ) {
//                ReturnInst::Create(Load, 0, RI); // Return void
                ReturnInst::Create(F.getContext(), 0, RI); // Return void
                RI->getParent()->getInstList().erase(RI);
            } else {
                // Start with an empty struct.
                Value *Return = ConstantAggregateZero::get(NRetTy);
                DEBUG(errs() << "Return: " << *Return->getType() << '\n');

                // Insert the original return value in field 0
                Return = InsertValueInst::Create(Return, RI->getOperand(0),
0, "ret", RI);
                DEBUG(errs() << "Return: " << *Return->getType() << '\n');

                // Insert the globals return value in field 1
                Return = InsertValueInst::Create(Return, Load, 1, "ret",
RI); // <- maybe useless
                DEBUG(errs() << "Return: " << *Return->getType() << '\n');

                // Update the return instruction
                RI->setOperand(0, Return);
            }
        } // if
    } // for

    // Replace all uses of the old arguments with the new arguments.
    for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(), NI =
NF->arg_begin();
         I != E; ++I, ++NI) {
        I->replaceAllUsesWith(NI);
    }

#if 1
    // Replace all callers
    while ( !F.use_empty() ) {
        CallSite CS(F.use_back());
        Instruction *Call = CS.getInstruction();
//        Function *CallingF = Call->getParent()->getParent();

        // Get the global struct in our caller.
        //Value* CallerGlobals = ModifyFunctionRecursive(CallingF).first;
        Value* CallerGlobals = NULL; // <- This should be modified later.

        // Copy the existing arguments
        std::vector<Value*> Args;
        Args.reserve(CS.arg_size());
        CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();

        // First, copy regular arguments
        for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++AI) {
            Args.push_back(*AI);
        }
        // Then, insert the new argument
        Args.push_back(CallerGlobals);
        // Lastly, copy any remaining varargs
        for (; AI != AE; ++AI) {
            Args.push_back(*AI);
        }

        Instruction *New;
        Instruction *Before = Call;
        if ( InvokeInst *II = dyn_cast<InvokeInst>(Call) ) {
            New = InvokeInst::Create(NF, II->getNormalDest(),
II->getUnwindDest(), Args, "", Before);
            cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
//          cast<InvokeInst>(New)->setParamAttrs(CS.getParamAttrs());
            cast<InvokeInst>(New)->setAttributes(CS.getAttributes());
        } else {
            New = CallInst::Create(NF, Args, "", Before);
            cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
//          cast<CallInst>(New)->setParamAttrs(CS.getParamAttrs());
            cast<CallInst>(New)->setAttributes(CS.getAttributes());
            if ( cast<CallInst>(Call)->isTailCall() ) {
                cast<CallInst>(New)->setTailCall();
            }
        }

        if (Call->hasName()) {
            New->takeName(Call);
        } else {
            New->setName(NF->getName() + ".ret");
        }

        Value *GlobalsRet;
        if ( Call->getType()->isVoidTy() ) {
            // The original function returned nothing, so the new function
returns
            // only the globals
            GlobalsRet = New;
        } else {
            // Split the values
            Value *OrigRet = ExtractValueInst::Create(New, 0, "origret",
Before);
            GlobalsRet     = ExtractValueInst::Create(New, 1, "globalsret",
Before);
            // Replace all the uses of the original result
            Call->replaceAllUsesWith(OrigRet);
        }

        // Now, store the globals back
        new StoreInst(GlobalsRet, CallerGlobals, Before);

        DEBUG(errs() << "Call " << *Call << " replaced, function is now " <<
*Call->getParent()->getParent() << "\n");

        // Finally, remove the old call from the program, reducing the
use-count of F.
        Call->eraseFromParent();

    } // while
#endif
    return true;
}


test.c
====================================================================================
#include <stdio.h>
#include <stdlib.h>

int v[200];
int haha(int);

int main()
{
    int i;
    int n=100;

    if ( !haha(n) )
        exit(-1);

    return 1;
}

int haha(int n)
//int haha(int n, char* IOCallIDs)
{
    int i;
    for (i=1; i<n; i++)
        v[i] = v[i-1] + v[i];

    printf ("hahaha\n");
    return 1;
}

Makefile
====================================================================================
LLVM_CONFIG?=llvm-config

# location of the source
# useful if you want separate source and object directories.
SRC_DIR?=$(PWD)

#ifndef VERBOSE
#    QUIET:=@
#endif

COMMON_FLAGS=-Wall -Wextra #-fvisibility=hidden
CFLAGS+=$(COMMON_FLAGS) $(shell $(LLVM_CONFIG) --cflags)
CXXFLAGS+=$(COMMON_FLAGS) $(shell $(LLVM_CONFIG) --cxxflags)

#ifeq ($(shell uname),Darwin)
#LOADABLE_MODULE_OPTIONS=-bundle -undefined dynamic_lookup
#else
LOADABLE_MODULE_OPTIONS=-shared -Wl,-O1
#endif

TEST_C=test.c
TEST_FILE=$(subst .c,.s, $(TEST_C))
PLUGIN=duplicateFunction.so
PLUGIN_OBJECTS=duplicateFunction.o

ALL_OBJECTS=$(PLUGIN_OBJECTS)
ALL_TARGETS=$(PLUGIN) $(TEST_FILE)

CPP_OPTIONS+=$(CPPFLAGS) $(shell $(LLVM_CONFIG) --cppflags) -MD -MP
-I$(SRC_DIR)

LD_OPTIONS+=$(LDFLAGS) $(shell $(LLVM_CONFIG) --ldflags)

all: $(ALL_TARGETS)

%.o : $(SRC_DIR)/%.cpp
    @echo Compiling $*.cpp
    $(QUIET)$(CXX) -c $(CPP_OPTIONS) $(CXXFLAGS) $<

$(PLUGIN): $(PLUGIN_OBJECTS)
    @echo Linking $@
    $(QUIET)$(CXX) -o $@ $(LOADABLE_MODULE_OPTIONS) $(CXXFLAGS)
$(LD_OPTIONS) $(PLUGIN_OBJECTS)

RUN_FLAGS=-duplicateFunction

$(TEST_FILE): $(TEST_C)
    clang -g -O3 -S -emit-llvm $^
#    clang -g -O0 -S -emit-llvm $^
#     clang -O0 -S -emit-llvm $^
run:
    opt -load ./$(PLUGIN) $(RUN_FLAGS) < $(TEST_FILE) > /dev/null 2> after.s
#    opt -load ./$(PLUGIN) $(RUN_FLAGS) < $(TEST_FILE) > /dev/null

clean:
    $(QUIET)rm -f $(ALL_OBJECTS) *.d $(PLUGIN) $(TEST_FILE)


-include $(ALL_OBJECTS:.o=.d)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20110916/15f49b43/attachment.html>


More information about the llvm-dev mailing list