[LLVMdev] How to duplicate a function?

Duncan Sands baldrick at free.fr
Tue Sep 27 00:47:24 PDT 2011


Hi Shawn, did you build LLVM with assertions enabled?  You should, since you
then get helpful error messages rather than obscure crashes.  Also, take a look
at ArgumentPromotion.cpp as an example of the kind of thing you are trying to
do.

Ciao, Duncan.

> Sorry for the inconvenient about the previous post. The files were not attached.
> So I put them here again.
>
> I am a newbie in LLVM and I am trying to replace the function like:
>
> old function               ||            new function
> ==============================
> =========
> int haha(int a) {                        int haha(int a, char* ID) {
>
>                              ===>
>
>
> }                                            }
>
> Of course in the newly replaced function "int haha(int, char*ID)", I want to
> insert some instrumentation code.
>
> Here is my code that I am working on till now and it generates segmentation
> fault in the place I comment with "//////////////////////"
> Can you help me? Any advice will be helpful because I am a beginner in llvm.
>
>
> Thank you in advance.
> Shawn.
>
>
> duplicateFunction.cpp
> =============================================================================
>
> //===- duplicateFunction.cpp - Writing an LLVM Pass -----------------------===//
> //
> //                     The LLVM Compiler Infrastructure
> //
> //===----------------------------------------------------------------------===//
> //
> // This file implements the LLVM duplicating function pass.
> // It starts by computing a new prototype for the function,
> // which is the same as the old function, but has an extra argument.
> //
> //===----------------------------------------------------------------------===//
> #include "llvm/Transforms/Utils/Cloning.h"
> #include "llvm/Pass.h"
> #include "llvm/Function.h"
> #include "llvm/Module.h"
> #include "llvm/CallingConv.h"
> #include "llvm/DerivedTypes.h"
> #include "llvm/InstrTypes.h"
> #include "llvm/Constants.h"
> #include "llvm/Instructions.h"
> #include "llvm/Support/raw_ostream.h"
> #include "llvm/Transforms/Utils/BasicBlockUtils.h"
> #include "llvm/BasicBlock.h"
> #include "llvm/Support/Debug.h"
> #include "llvm/Support/CallSite.h"
> using namespace llvm;
>
> namespace {
>      Constant *f;
>      Function *Fn;
>      FunctionType *FTy;
>      Type *RetTy;
>      std::vector<Type*> Params;
>      class DP : public FunctionPass {
>
>        public:
>          static char ID;
>
>          DP() : FunctionPass(ID) {}
>
>          virtual bool doInitialization(Module &M);
>          virtual bool runOnFunction(Function &F);
>          virtual bool doFinalization(Module &mdl) {
>              mdl.dump();
>              return true;
>          }
>      }; /* class */
> }
>
> char DP::ID = 0;
> static RegisterPass<DP> IC("duplicateFunction", "Duplicate Function Pass");
>
>
> bool DP::doInitialization(Module &M) {
>
>      // find the function that we want to change.
>      Fn = M.getFunction("haha");
>
>      // Start by computing a new prototype for the function, which is the same as
>      // the old function, but has an extra argument.
>      FTy = Fn->getFunctionType();
>
>      // Find out the return value.
>      RetTy = FTy->getReturnType();
>
>      // set the calling convention to C.
>      // so, we interoperate with C Code properly.
>      Function *tmp = cast<Function>(Fn);
>      tmp->setCallingConv(CallingConv::C);
>
>      return true;
> }
>
> bool DP::runOnFunction(Function &F) {
> #if 0
>      Value *param;
>
>      // Find the instruction before which you want to insert the function call
>      Instruction *nextInstr = F.back().getTerminator();
>
>      // Create the actual parameter for the function call
>      param = ConstantInt::get(Type::getInt32Ty(F.getContext()), 333);
>
>      // create and insert the function call
>      //CallInst::Create(f, param, "", nextInstr);
>      CallInst::Create(Fn, param, "", nextInstr);
>
>      // indicates that we changed the code
>      //return true;
> #endif
>      Type *NRetTy;
>
>      std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
>      FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false);
>
>      // Create the new function body and insert it into the module...
>      Function *NF = Function::Create(NFTy, Fn->getLinkage());
>      NF->copyAttributesFrom(Fn);
>      Fn->getParent()->getFunctionList().insert(Fn, NF);
>      NF->takeName(Fn);
>
>      for (Function::arg_iterator AI=F.arg_begin(), AE=F.arg_end(),
> NAI=NF->arg_begin();
>           AI != AE; ++AI, ++NAI) {
>          NAI->takeName(AI);
>      }
>
>      // Since we have now create the new function, splice the body of the old
>      // function right into the new function, leaving the old rotting hulk of the
>      // function empty.
>      NF->getBasicBlockList().splice(NF->begin(), F.getBasicBlockList());
>
>      llvm::Value *Globals = --NF->arg_end();
>      Globals->setName("IOCallIDs");
>
>      // Now, exploit all return instructions.
>      for (Function::iterator BI = NF->begin(), BE = NF->end(); BI != BE; ++BI) {
>          if (ReturnInst *RI =
> llvm::dyn_cast<llvm::ReturnInst>(BI->getTerminator())) {
>              // Don't support functions that have multiple return values.
>              assert(RI->getNumOperands() < 2);
>
>              // Insert a new load instruction to return.
>
> /////////////////////////////////////////////////////////////////////////////////////
> HERE, GENERATE ERROR
>
> /////////////////////////////////////////////////////////////////////////////////////
>              Value *Load = new llvm::LoadInst(Globals, "globalsret", RI);
>
> /////////////////////////////////////////////////////////////////////////////////////
>              // Return type is void
>              if ( RetTy->isVoidTy() ) {
> //                ReturnInst::Create(Load, 0, RI); // Return void
>                  ReturnInst::Create(F.getContext(), 0, RI); // Return void
>                  RI->getParent()->getInstList().erase(RI);
>              } else {
>                  // Start with an empty struct.
>                  Value *Return = ConstantAggregateZero::get(NRetTy);
>                  DEBUG(errs() << "Return: " << *Return->getType() << '\n');
>
>                  // Insert the original return value in field 0
>                  Return = InsertValueInst::Create(Return, RI->getOperand(0), 0,
> "ret", RI);
>                  DEBUG(errs() << "Return: " << *Return->getType() << '\n');
>
>                  // Insert the globals return value in field 1
>                  Return = InsertValueInst::Create(Return, Load, 1, "ret", RI);
> // <- maybe useless
>                  DEBUG(errs() << "Return: " << *Return->getType() << '\n');
>
>                  // Update the return instruction
>                  RI->setOperand(0, Return);
>              }
>          } // if
>      } // for
>
>      // Replace all uses of the old arguments with the new arguments.
>      for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(), NI =
> NF->arg_begin();
>           I != E; ++I, ++NI) {
>          I->replaceAllUsesWith(NI);
>      }
>
> #if 1
>      // Replace all callers
>      while ( !F.use_empty() ) {
>          CallSite CS(F.use_back());
>          Instruction *Call = CS.getInstruction();
> //        Function *CallingF = Call->getParent()->getParent();
>
>          // Get the global struct in our caller.
>          //Value* CallerGlobals = ModifyFunctionRecursive(CallingF).first;
>          Value* CallerGlobals = NULL; // <- This should be modified later.
>
>          // Copy the existing arguments
>          std::vector<Value*> Args;
>          Args.reserve(CS.arg_size());
>          CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
>
>          // First, copy regular arguments
>          for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++AI) {
>              Args.push_back(*AI);
>          }
>          // Then, insert the new argument
>          Args.push_back(CallerGlobals);
>          // Lastly, copy any remaining varargs
>          for (; AI != AE; ++AI) {
>              Args.push_back(*AI);
>          }
>
>          Instruction *New;
>          Instruction *Before = Call;
>          if ( InvokeInst *II = dyn_cast<InvokeInst>(Call) ) {
>              New = InvokeInst::Create(NF, II->getNormalDest(),
> II->getUnwindDest(), Args, "", Before);
>              cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
> //          cast<InvokeInst>(New)->setParamAttrs(CS.getParamAttrs());
>              cast<InvokeInst>(New)->setAttributes(CS.getAttributes());
>          } else {
>              New = CallInst::Create(NF, Args, "", Before);
>              cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
> //          cast<CallInst>(New)->setParamAttrs(CS.getParamAttrs());
>              cast<CallInst>(New)->setAttributes(CS.getAttributes());
>              if ( cast<CallInst>(Call)->isTailCall() ) {
>                  cast<CallInst>(New)->setTailCall();
>              }
>          }
>
>          if (Call->hasName()) {
>              New->takeName(Call);
>          } else {
>              New->setName(NF->getName() + ".ret");
>          }
>
>          Value *GlobalsRet;
>          if ( Call->getType()->isVoidTy() ) {
>              // The original function returned nothing, so the new function returns
>              // only the globals
>              GlobalsRet = New;
>          } else {
>              // Split the values
>              Value *OrigRet = ExtractValueInst::Create(New, 0, "origret", Before);
>              GlobalsRet     = ExtractValueInst::Create(New, 1, "globalsret",
> Before);
>              // Replace all the uses of the original result
>              Call->replaceAllUsesWith(OrigRet);
>          }
>
>          // Now, store the globals back
>          new StoreInst(GlobalsRet, CallerGlobals, Before);
>
>          DEBUG(errs() << "Call " << *Call << " replaced, function is now " <<
> *Call->getParent()->getParent() << "\n");
>
>          // Finally, remove the old call from the program, reducing the
> use-count of F.
>          Call->eraseFromParent();
>
>      } // while
> #endif
>      return true;
> }
>
>
> test.c
> ====================================================================================
> #include <stdio.h>
> #include <stdlib.h>
>
> int v[200];
> int haha(int);
>
> int main()
> {
>      int i;
>      int n=100;
>
>      if ( !haha(n) )
>          exit(-1);
>
>      return 1;
> }
>
> int haha(int n)
> //int haha(int n, char* IOCallIDs)
> {
>      int i;
>      for (i=1; i<n; i++)
>          v[i] = v[i-1] + v[i];
>
>      printf ("hahaha\n");
>      return 1;
> }
>
> Makefile
> ====================================================================================
> LLVM_CONFIG?=llvm-config
>
> # location of the source
> # useful if you want separate source and object directories.
> SRC_DIR?=$(PWD)
>
> #ifndef VERBOSE
> #    QUIET:=@
> #endif
>
> COMMON_FLAGS=-Wall -Wextra #-fvisibility=hidden
> CFLAGS+=$(COMMON_FLAGS) $(shell $(LLVM_CONFIG) --cflags)
> CXXFLAGS+=$(COMMON_FLAGS) $(shell $(LLVM_CONFIG) --cxxflags)
>
> #ifeq ($(shell uname),Darwin)
> #LOADABLE_MODULE_OPTIONS=-bundle -undefined dynamic_lookup
> #else
> LOADABLE_MODULE_OPTIONS=-shared -Wl,-O1
> #endif
>
> TEST_C=test.c
> TEST_FILE=$(subst .c,.s, $(TEST_C))
> PLUGIN=duplicateFunction.so
> PLUGIN_OBJECTS=duplicateFunction.o
>
> ALL_OBJECTS=$(PLUGIN_OBJECTS)
> ALL_TARGETS=$(PLUGIN) $(TEST_FILE)
>
> CPP_OPTIONS+=$(CPPFLAGS) $(shell $(LLVM_CONFIG) --cppflags) -MD -MP -I$(SRC_DIR)
>
> LD_OPTIONS+=$(LDFLAGS) $(shell $(LLVM_CONFIG) --ldflags)
>
> all: $(ALL_TARGETS)
>
> %.o : $(SRC_DIR)/%.cpp
>      @echo Compiling $*.cpp
>      $(QUIET)$(CXX) -c $(CPP_OPTIONS) $(CXXFLAGS) $<
>
> $(PLUGIN): $(PLUGIN_OBJECTS)
>      @echo Linking $@
>      $(QUIET)$(CXX) -o $@ $(LOADABLE_MODULE_OPTIONS) $(CXXFLAGS) $(LD_OPTIONS)
> $(PLUGIN_OBJECTS)
>
> RUN_FLAGS=-duplicateFunction
>
> $(TEST_FILE): $(TEST_C)
>      clang -g -O3 -S -emit-llvm $^
> #    clang -g -O0 -S -emit-llvm $^
> #     clang -O0 -S -emit-llvm $^
> run:
>      opt -load ./$(PLUGIN) $(RUN_FLAGS) < $(TEST_FILE) > /dev/null 2> after.s
> #    opt -load ./$(PLUGIN) $(RUN_FLAGS) < $(TEST_FILE) > /dev/null
>
> clean:
>      $(QUIET)rm -f $(ALL_OBJECTS) *.d $(PLUGIN) $(TEST_FILE)
>
>
> -include $(ALL_OBJECTS:.o=.d)
>
>
>
>
>
>
>
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev




More information about the llvm-dev mailing list