[LLVMdev] Obfuscation with LLVM
Evgeny Podlepaev
evgeny4u at gmail.com
Wed May 17 15:03:46 PDT 2006
Hi all,
I was trying to implement an obfuscation tool for C-code on the basis of
LLVM. I got a prototype of the simple obfuscation transformation which
converting control flow graph to something like a state machine. I am not
sure I will have time to work on extending further this tool with new
transformations like opaque predicates and decided to put here source code I
have by now with hope that it can be helpful to somebody. I am newbie with
LLVM and am sure there are a lot of bugs in current implementation, but I
have tested it with complex enough C-program and in my case it worked. Sorry
for the poor English and grammar.
Regards,
Evgeny Podlepaev
---------------------------------------------
MakeDispatcherPass.h---------------------------------------------
#include <map>
#include <string>
#include "llvm/Pass.h"
#include "llvm/Function.h"
#include "llvm/BasicBlock.h"
#include "llvm/Instructions.h"
using namespace llvm;
class MakeDispatcherPass : public FunctionPass
{
public:
virtual bool runOnFunction( Function& currFunction );
private:
typedef std::map< BasicBlock*, unsigned > BBindexBBMap;
static unsigned IndexSourceBasicBlocks( Function& function,
BBindexBBMap& indexBBMap );
static BasicBlock* CreateNewEntryBlock( const std::string& name,
BasicBlock *oldEntryBB );
static void LinkBasicBlockToDispatcher( BasicBlock* basicBlock,
BasicBlock* dispatcherBlock, Value* statePtr, BBindexBBMap& indexBBMap );
static void MovePHINodesToDispatcher( BBindexBBMap& indexBBMap,
BasicBlock* dispatcherBB );
static void ConvertSwitch( Function& function );
static BasicBlock* ProcessCase( SwitchInst* switchInst, int caseIdx,
BasicBlock* prevBB, Value* testValuePtr );
static void ReduceTempVarsLifetime( Function& function );
static bool IsUsedOutsideParentBlock( Instruction* value );
static const std::string PassName;
};
---------------------------------------------
MakeDispatcherPass.cpp---------------------------------------------
#include <map>
#include <vector>
#include <cassert>
#include <iostream>
#include <exception>
#include "llvm/Type.h"
#include "llvm/Module.h"
#include "llvm/Function.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/InstIterator.h"
#include "MakeDispatcherPass.h"
//
// Static variables.
//
const std::string MakeDispatcherPass::PassName = "MakeDispatcherPass";
//
// Public methods.
//
bool MakeDispatcherPass::runOnFunction( Function& currFunction )
{
DEBUG( std::cerr << PassName << ": Processing " << currFunction.getName()
<< " ...\n" );
bool madeChange = true;
// Because of changes which we will make in control flow graph it is
necessary
// to get rid of temporary variables used outside their parent basic
block.
ReduceTempVarsLifetime( currFunction );
// Replace all SwitchInst instructions with chained branch instructions
// in order to simplify the dispatcher embedding.
//
// NOTE: We can use LowerSwitch Pass instead of our implementation.
ConvertSwitch( currFunction );
// Keep a reference to the old entry basic block.
BasicBlock* oldEntryBB = &currFunction.getEntryBlock();
// Collect basic blocks to be reordered and assign unique index for
each.
BBindexBBMap indexBBMap;
unsigned firstBBIndex = IndexSourceBasicBlocks( currFunction, indexBBMap
);
// Create basic blocks for initialization and dispatching.
BasicBlock* disInitBB = CreateNewEntryBlock( "dispatcher_init",
oldEntryBB );
BasicBlock* disMainBB = new BasicBlock( "dispatcher_main",
&currFunction, oldEntryBB );
// Add instructions that allocate and initialize the dispatcher's state
variable
// to the initialization block.
AllocaInst* statePtr = new AllocaInst( Type::UIntTy, 0, "state",
disInitBB );
new StoreInst( ConstantUInt::get( Type::UIntTy, firstBBIndex ),
statePtr, disInitBB );
new BranchInst( disMainBB, disInitBB ); // Connection to the
dispatcher's main block.
// Add instructions to the dispatcher's main block that implement
switching
// between the basic blocks depending on the state variable's value.
LoadInst* loadState = new LoadInst( statePtr, "next", disMainBB );
SwitchInst* switchInst = new SwitchInst( loadState, oldEntryBB,
unsigned( indexBBMap.size() ), disMainBB );
for( BBindexBBMap::iterator idxBBPair = indexBBMap.begin(); idxBBPair !=
indexBBMap.end(); idxBBPair++ )
{
switchInst->addCase( ConstantUInt::get( Type::UIntTy,
idxBBPair->second ), idxBBPair->first );
}
// Unlink all source basic blocks and relink them to the dispatcher's
main block.
for( BBindexBBMap::iterator idxBBPair = indexBBMap.begin(); idxBBPair !=
indexBBMap.end(); idxBBPair++ )
{
LinkBasicBlockToDispatcher( idxBBPair->first, disMainBB, statePtr,
indexBBMap );
}
// Since the predecessors of source basic blocks was replaced by the
dispatcher's
// basic block it is necessary to update all PHI nodes in source blocks.
PHI is a
// special artificial definition for supporting the Static Single
Assignment form
// used for data flow analysis (
http://gcc.gnu.org/onlinedocs/gccint/SSA.html).
MovePHINodesToDispatcher( indexBBMap, disMainBB );
return madeChange;
}
//
// Private methods.
//
unsigned MakeDispatcherPass::IndexSourceBasicBlocks( Function& function,
BBindexBBMap& indexBBMap )
{
unsigned startIdx = 0;
// Keep all source basic blocks, including entry and exit blocks.
unsigned idx = startIdx;
for( Function::iterator i = function.begin(); i != function.end(); i++ )
{
// Add basic block with index.
indexBBMap[ &*i ] = idx++;
}
// Return the index of the first basic block that is an entry to the
function.
return startIdx;
}
BasicBlock* MakeDispatcherPass::CreateNewEntryBlock( const std::string&
name, BasicBlock *oldEntryBB )
{
// Create a new basic block and insert it before old entry block.
BasicBlock* newEntryBB = new BasicBlock( name, oldEntryBB->getParent(),
oldEntryBB );
// Move all allocation instructions for stack variables from the old
entry block to the new one.
// These instructions should be placed at the beginning of the function
and should dominate
// all their uses.
for( BasicBlock::iterator i = oldEntryBB->begin(); i !=
oldEntryBB->end(); )
{
if( AllocationInst* ai = dyn_cast< AllocationInst >( i ) )
{
oldEntryBB->getInstList().remove( i++ ); // Trick to
preserve the valid iterator after removing an element.
newEntryBB->getInstList().push_back( ai );
}
else
{
i++;
}
}
oldEntryBB->setName( "old_entry" );
return newEntryBB;
}
void MakeDispatcherPass::LinkBasicBlockToDispatcher( BasicBlock* basicBlock,
BasicBlock* dispatcherBlock, Value* statePtr, BBindexBBMap& indexBBMap )
{
// Get a reference to the basic block terminator. We should replace it
// by jump to the dispatcher's basic block.
TerminatorInst* terminator = basicBlock->getTerminator();
assert( terminator && "Basic block is not well formed and has no
terminator!" );
if( isa< ReturnInst >( terminator ) )
{
// This block is exit from the function. We will not change it.
return;
}
else if( isa< BranchInst >( terminator ) )
{
// This block is terminating by branch instruction. We will replace
target
// of the branch by the dispatcher block and set current state
variable to
// a proper basic block's index.
BranchInst* branchInst = dynamic_cast< BranchInst* >( terminator );
// Add instruction selecting a proper basic block's index.
Value* stateValue = 0;
if( !branchInst->isConditional() )
{
stateValue = ConstantUInt::get(
Type::UIntTy,
indexBBMap[ branchInst->getSuccessor( 0 ) ]
);
}
else
{
stateValue = new SelectInst(
branchInst->getCondition(),
ConstantUInt::get( Type::UIntTy, indexBBMap[
branchInst->getSuccessor( 0 ) ] ),
ConstantUInt::get( Type::UIntTy, indexBBMap[
branchInst->getSuccessor( 1 ) ] ),
"select_state",
basicBlock
);
}
// Add instructions for load of the state variable and branch to the
dispatcher.
new StoreInst( stateValue, statePtr, basicBlock );
new BranchInst( dispatcherBlock, basicBlock );
basicBlock->getInstList().erase( branchInst );
}
else
{
// TODO: Support other possible terminators.
throw std::exception( ( PassName + ": Unsupported basic block
terminator: " + typeid( terminator ).name() ).c_str() );
}
}
void MakeDispatcherPass::MovePHINodesToDispatcher( BBindexBBMap& indexBBMap,
BasicBlock* dispatcherBB )
{
// Iterate through the list of indexed basic blocks and move all PHIs
nodes
// to the dispatcher's basic block (target basic block) that is new
predecessor
// for all them.
for( BBindexBBMap::iterator idxBBPair = indexBBMap.begin(); idxBBPair !=
indexBBMap.end(); idxBBPair++ )
{
// Iterate through the PHIs nodes that can be located only at the
// beginning of the basic block.
PHINode* phi = 0;
for( BasicBlock::iterator inst = idxBBPair->first->begin(); ( phi =
dyn_cast< PHINode >( inst ) ) != 0; )
{
// Move the PHI node to the target basic block.
phi->getParent()->getInstList().remove( inst++ );
dispatcherBB->getInstList().insert(
dispatcherBB->getInstList().begin(), phi );
// Since a PHI node must have an incoming value for every
predecessor of
// the parent basic block we should add all predecessors of the
target
// basic block that have no corresponding incoming value in the
PHI node
// as new incoming undefined value to the PHI node.
int wasIncomingCount = 0;
for( pred_iterator pred = pred_begin( dispatcherBB ); pred !=
pred_end( dispatcherBB ); pred++ )
{
int idx = phi->getBasicBlockIndex( *pred );
if( idx == -1 )
{
phi->addIncoming( UndefValue::get( phi->getType() ),
*pred );
}
else
{
wasIncomingCount++;
}
}
assert( wasIncomingCount && "No one of the predecessors was not
incoming value to the PHI!" );
}
}
}
void MakeDispatcherPass::ConvertSwitch( Function& function )
{
BasicBlock* entryBB = &function.getEntryBlock();
for( Function::iterator i = function.begin(); i != function.end(); i++ )
{
BasicBlock* basicBlock = &*i;
TerminatorInst* terminator = basicBlock->getTerminator();
assert( terminator && "Basic block is not well formed and has no
terminator!" );
if( isa< SwitchInst >( terminator ) )
{
SwitchInst* switchInst = dynamic_cast< SwitchInst * >(
basicBlock->getTerminator() );
// Allocate a local variable and add an instruction saving the
value
// being tested by the switch instruction to this variable.
//
// Allocating of the new variable is necessary because initial
order
// of the basic blocks will be changed by dispatcher embedding.
AllocaInst* testValuePtr = new AllocaInst(
switchInst->getCondition()->getType(), 0, "switch_test_val",
entryBB->getTerminator() );
new StoreInst( switchInst->getCondition(), testValuePtr, false,
switchInst );
// Replace the switch instruction by basic blocks with
conditional branches.
BasicBlock* caseBlock = ProcessCase( switchInst, 1, basicBlock,
testValuePtr );
switchInst->eraseFromParent();
new BranchInst( caseBlock, basicBlock );
}
}
}
BasicBlock* MakeDispatcherPass::ProcessCase( SwitchInst* switchInst, int
caseIdx, BasicBlock* prevBB, Value* testValuePtr )
{
// Create a new basic block for instructions checking the current case
of the switch instruction.
BasicBlock* caseBlock = new BasicBlock( "case", prevBB->getParent(),
prevBB->getNext() );
// If not all cases was handled ...
if( caseIdx != switchInst->getNumCases() )
{
// Instructions for checking the case.
LoadInst* testValue = new LoadInst( testValuePtr,
"test_value", caseBlock );
SetCondInst* setCondInst = new SetCondInst( Instruction::SetEQ,
testValue, switchInst->getCaseValue( caseIdx ), "is_case", caseBlock );
// Get a reference to the basic block for the next case.
BasicBlock* nextCaseBlock = ProcessCase( switchInst, caseIdx + 1,
caseBlock, testValuePtr );
// Add an instruction for branching to the basic block
// corresponding to the condition of this case or to
// the basic block checking the next case.
BranchInst* branchInst = new BranchInst( switchInst->getSuccessor(
caseIdx ), nextCaseBlock, setCondInst, caseBlock );
}
else if( switchInst->getDefaultDest() != 0 )
{
BranchInst* branchInst = new BranchInst(
switchInst->getDefaultDest(), caseBlock );
}
return caseBlock;
}
void MakeDispatcherPass::ReduceTempVarsLifetime( Function& function )
{
BasicBlock* entryBB = &function.getEntryBlock();
typedef std::vector< Instruction * > InstList;
// Collect all instructions (temporary variables) that have result used
outside
// their parent basic block. Skip the AllocInst instructions because it
will be
// handled separately during creation of new entry basic block.
InstList insts;
for( inst_iterator i = inst_begin( function ); i != inst_end( function
); i++ )
{
Instruction* inst = &*i;
if( IsUsedOutsideParentBlock( inst ) && !isa< AllocaInst >( inst ) )
{
insts.push_back( inst );
}
}
// Walk through the list of temporary variables to reduce their lifetime
// to its parent basic block.
for( InstList::iterator i = insts.begin(); i != insts.end(); i++ )
{
Instruction* inst = *i;
// Collect all instructions that are using the temporary variable.
InstList users;
for( Value::use_iterator j = inst->use_begin(); j !=
inst->use_end(); j++ )
{
users.push_back( dynamic_cast< Instruction* >( *j ) );
}
// Allocate local variable on the function's stack frame and save
there
// the temporary variable (result of the instruction).
AllocaInst* valuePtr = new AllocaInst( inst->getType(), 0,
"reduced_var", entryBB->getTerminator() );
new StoreInst( inst, valuePtr, false, inst->getNext() );
// Patch users of the temporary variable to use the local function
variable.
for( InstList::iterator j = users.begin(); j != users.end(); j++ )
{
Instruction* user = dynamic_cast< Instruction* >( *j );
if( isa< PHINode >( user ) )
{
// For the phi node we should look through every incoming
value and,
// if it is necessary, add to the preceding basic block an
instruction
// for loading the saved temporary variable's value.
PHINode* phiNode = dynamic_cast< PHINode* >( user );
for( unsigned idx = 0; idx <
phiNode->getNumIncomingValues(); idx++ )
{
if( phiNode->getIncomingValue( idx ) == inst )
{
LoadInst* loadedValue = new LoadInst( valuePtr,
"value_of_reduced_var", phiNode->getIncomingBlock( idx )->getTerminator() );
phiNode->setIncomingValue( idx, loadedValue );
}
}
}
else
{
user->replaceUsesOfWith( inst, new LoadInst( valuePtr,
"value_of_reduced_var", user ) );
}
}
}
}
bool MakeDispatcherPass::IsUsedOutsideParentBlock( Instruction* value )
{
for( Value::use_iterator i = value->use_begin(); i != value->use_end();
i++ )
{
Instruction* user = dynamic_cast< Instruction* >( *i );
if( user->getParent() != value->getParent() )
{
return true;
}
}
return false;
}
---------------------------------------------
Main.cpp---------------------------------------------
#include <iostream>
#include <fstream>
#include "llvm/Pass.h"
#include "llvm/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Bytecode/Reader.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Bytecode/WriteBytecodePass.h"
#include "MakeDispatcherPass.h"
using namespace llvm;
static cl::opt< std::string >
InputFilename( cl::Positional, cl::desc( "<input .bc file>" ), cl::Required
);
static cl::opt< std::string >
OutputFilename( cl::Positional, cl::desc( "<output .bc file>" ),
cl::Required );
static cl::opt< bool >
NoVerify( "disable-verify", cl::desc( "Do not verify result module" ),
cl::Optional );
int main( int argc, char* argv[] )
{
std::string programName = argv[ 0 ] = "obfuscator";
try
{
cl::ParseCommandLineOptions( argc, argv );
std::string errorMessage;
// Load the input module.
std::auto_ptr< Module > M( ParseBytecodeFile( InputFilename,
&errorMessage ) );
if( M.get() == 0 )
{
std::cerr << programName << ": " << errorMessage << "\n";
return 1;
}
// Open the stream we are supposed to write to.
std::ostream *Out = new std::ofstream( OutputFilename.c_str(),
std::ios::out | std::ios::trunc | std::ios::binary );
if( !Out->good() )
{
std::cerr << programName << ": Error opening " << OutputFilename
<< "!\n";
return 1;
}
// Create a PassManager to hold and optimize the collection of
passes we are about to build...
PassManager Passes;
// Add an appropriate TargetData instance for this module.
Passes.add( new TargetData( "obfuscator", M.get() ) );
Passes.add( new MakeDispatcherPass() );
if( !NoVerify )
{
Passes.add( createVerifierPass() );
}
Passes.add( new WriteBytecodePass( Out, true ) );
// Now that we have all of the passes ready, run them.
Passes.run( *M.get() );
}
catch( std::exception e )
{
std::cerr << programName << ": " << e.what() << "\n";
return 1;
}
catch( const std::string& msg )
{
std::cerr << programName << ": " << msg << "\n";
return 1;
}
catch( ... )
{
std::cerr << programName << ": Unexpected exception occurred.\n";
return 1;
}
return 0;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20060517/0d482cb3/attachment.html>
More information about the llvm-dev
mailing list