[LLVMdev] Obfuscation with LLVM

Evgeny Podlepaev evgeny4u at gmail.com
Wed May 17 15:03:46 PDT 2006


Hi all,

I was trying to implement an obfuscation tool for C-code on the basis of
LLVM.  I got a prototype of the simple obfuscation transformation which
converting control flow graph to something like a state machine. I am not
sure I will have time to work on extending further this tool with new
transformations like opaque predicates and decided to put here source code I
have by now with hope that it can be helpful to somebody. I am newbie with
LLVM and am sure there are a lot of bugs in current implementation, but I
have tested it with complex enough C-program and in my case it worked.  Sorry
for the poor English and grammar.

Regards,
Evgeny Podlepaev
---------------------------------------------
MakeDispatcherPass.h---------------------------------------------

#include <map>
#include <string>

#include "llvm/Pass.h"
#include "llvm/Function.h"
#include "llvm/BasicBlock.h"
#include "llvm/Instructions.h"

using namespace llvm;

class MakeDispatcherPass : public FunctionPass
{
public:
    virtual bool runOnFunction( Function& currFunction );

private:
    typedef std::map< BasicBlock*, unsigned > BBindexBBMap;

    static unsigned IndexSourceBasicBlocks( Function& function,
BBindexBBMap& indexBBMap );

    static BasicBlock* CreateNewEntryBlock( const std::string& name,
BasicBlock *oldEntryBB );

    static void LinkBasicBlockToDispatcher( BasicBlock* basicBlock,
BasicBlock* dispatcherBlock, Value* statePtr, BBindexBBMap& indexBBMap );

    static void MovePHINodesToDispatcher( BBindexBBMap& indexBBMap,
BasicBlock* dispatcherBB );

    static void ConvertSwitch( Function& function );

    static BasicBlock* ProcessCase( SwitchInst* switchInst, int caseIdx,
BasicBlock* prevBB, Value* testValuePtr );

    static void ReduceTempVarsLifetime( Function& function );

    static bool IsUsedOutsideParentBlock( Instruction* value );

    static const std::string PassName;
};

---------------------------------------------
MakeDispatcherPass.cpp---------------------------------------------

#include <map>
#include <vector>
#include <cassert>
#include <iostream>
#include <exception>

#include "llvm/Type.h"
#include "llvm/Module.h"
#include "llvm/Function.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/InstIterator.h"

#include "MakeDispatcherPass.h"

//
// Static variables.
//

const std::string MakeDispatcherPass::PassName = "MakeDispatcherPass";

//
// Public methods.
//

bool MakeDispatcherPass::runOnFunction( Function& currFunction )
{
    DEBUG( std::cerr << PassName << ": Processing " << currFunction.getName()
<< " ...\n" );
    bool madeChange = true;

    // Because of changes which we will make in control flow graph it is
necessary
    // to get rid of temporary variables used outside their parent basic
block.
    ReduceTempVarsLifetime( currFunction );

    // Replace all SwitchInst instructions with chained branch instructions
    // in order to simplify the dispatcher embedding.
    //
    // NOTE: We can use LowerSwitch Pass instead of our implementation.
    ConvertSwitch( currFunction );

    // Keep a reference to the old entry basic block.
    BasicBlock* oldEntryBB = &currFunction.getEntryBlock();

    // Collect basic blocks to be reordered and assign unique index for
each.
    BBindexBBMap indexBBMap;
    unsigned firstBBIndex = IndexSourceBasicBlocks( currFunction, indexBBMap
);

    // Create basic blocks for initialization and dispatching.
    BasicBlock* disInitBB = CreateNewEntryBlock( "dispatcher_init",
oldEntryBB );
    BasicBlock* disMainBB = new BasicBlock(    "dispatcher_main",
&currFunction, oldEntryBB );

    // Add instructions that allocate and initialize the dispatcher's state
variable
    // to the initialization block.
    AllocaInst* statePtr = new AllocaInst( Type::UIntTy, 0, "state",
disInitBB );
    new StoreInst( ConstantUInt::get( Type::UIntTy, firstBBIndex ),
statePtr, disInitBB );
    new BranchInst( disMainBB, disInitBB );    // Connection to the
dispatcher's main block.

    // Add instructions to the dispatcher's main block that implement
switching
    // between the basic blocks depending on the state variable's value.
    LoadInst* loadState = new LoadInst( statePtr, "next", disMainBB );
    SwitchInst* switchInst = new SwitchInst( loadState, oldEntryBB,
unsigned( indexBBMap.size() ), disMainBB );
    for( BBindexBBMap::iterator idxBBPair = indexBBMap.begin(); idxBBPair !=
indexBBMap.end(); idxBBPair++ )
    {
        switchInst->addCase( ConstantUInt::get( Type::UIntTy,
idxBBPair->second ), idxBBPair->first );
    }

    // Unlink all source basic blocks and relink them to the dispatcher's
main block.
    for( BBindexBBMap::iterator idxBBPair = indexBBMap.begin(); idxBBPair !=
indexBBMap.end(); idxBBPair++ )
    {
        LinkBasicBlockToDispatcher( idxBBPair->first, disMainBB, statePtr,
indexBBMap );
    }

    // Since the predecessors of source basic blocks was replaced by the
dispatcher's
    // basic block it is necessary to update all PHI nodes in source blocks.
PHI is a
    // special artificial definition for supporting the Static Single
Assignment form
    // used for data flow analysis (
http://gcc.gnu.org/onlinedocs/gccint/SSA.html).
    MovePHINodesToDispatcher( indexBBMap, disMainBB );

    return madeChange;
}

//
// Private methods.
//

unsigned MakeDispatcherPass::IndexSourceBasicBlocks( Function& function,
BBindexBBMap& indexBBMap )
{
    unsigned startIdx = 0;

    // Keep all source basic blocks, including entry and exit blocks.
    unsigned idx = startIdx;
    for( Function::iterator i = function.begin(); i != function.end(); i++ )
    {
        // Add basic block with index.
        indexBBMap[ &*i ] = idx++;
    }

    // Return the index of the first basic block that is an entry to the
function.
    return startIdx;
}

BasicBlock* MakeDispatcherPass::CreateNewEntryBlock( const std::string&
name, BasicBlock *oldEntryBB )
{
    // Create a new basic block and insert it before old entry block.
    BasicBlock* newEntryBB = new BasicBlock( name, oldEntryBB->getParent(),
oldEntryBB );

    // Move all allocation instructions for stack variables from the old
entry block to the new one.
    // These instructions should be placed at the beginning of the function
and should dominate
    // all their uses.
    for( BasicBlock::iterator i = oldEntryBB->begin(); i !=
oldEntryBB->end(); )
    {
        if( AllocationInst* ai = dyn_cast< AllocationInst >( i ) )
        {
            oldEntryBB->getInstList().remove( i++ );        // Trick to
preserve the valid iterator after removing an element.
            newEntryBB->getInstList().push_back( ai );
        }
        else
        {
            i++;
        }
    }

    oldEntryBB->setName( "old_entry" );
    return newEntryBB;
}

void MakeDispatcherPass::LinkBasicBlockToDispatcher( BasicBlock* basicBlock,
BasicBlock* dispatcherBlock, Value* statePtr, BBindexBBMap& indexBBMap )
{
    // Get a reference to the basic block terminator. We should replace it
    // by jump to the dispatcher's basic block.
    TerminatorInst* terminator = basicBlock->getTerminator();
    assert( terminator && "Basic block is not well formed and has no
terminator!" );

    if( isa< ReturnInst >( terminator ) )
    {
        // This block is exit from the function. We will not change it.
        return;
    }
    else if( isa< BranchInst >( terminator ) )
    {
        // This block is terminating by branch instruction. We will replace
target
        // of the branch by the dispatcher block and set current state
variable to
        // a proper basic block's index.
        BranchInst* branchInst = dynamic_cast< BranchInst* >( terminator );

        // Add instruction selecting a proper basic block's index.
        Value* stateValue = 0;
        if( !branchInst->isConditional() )
        {
            stateValue = ConstantUInt::get(
                    Type::UIntTy,
                    indexBBMap[ branchInst->getSuccessor( 0 ) ]
                );
        }
        else
        {
            stateValue = new SelectInst(
                    branchInst->getCondition(),
                    ConstantUInt::get( Type::UIntTy, indexBBMap[
branchInst->getSuccessor( 0 ) ] ),
                    ConstantUInt::get( Type::UIntTy, indexBBMap[
branchInst->getSuccessor( 1 ) ] ),
                    "select_state",
                    basicBlock
                );
        }

        // Add instructions for load of the state variable and branch to the
dispatcher.
        new StoreInst( stateValue, statePtr, basicBlock );
        new BranchInst( dispatcherBlock, basicBlock );
        basicBlock->getInstList().erase( branchInst );
    }
    else
    {
        // TODO: Support other possible terminators.
        throw std::exception( ( PassName + ": Unsupported basic block
terminator: " + typeid( terminator ).name() ).c_str() );
    }
}

void MakeDispatcherPass::MovePHINodesToDispatcher( BBindexBBMap& indexBBMap,
BasicBlock* dispatcherBB )
{
    // Iterate through the list of indexed basic blocks and move all PHIs
nodes
    // to the dispatcher's basic block (target basic block) that is new
predecessor
    // for all them.
    for( BBindexBBMap::iterator idxBBPair = indexBBMap.begin(); idxBBPair !=
indexBBMap.end(); idxBBPair++ )
    {
        // Iterate through the PHIs nodes that can be located only at the
        // beginning of the basic block.
        PHINode* phi = 0;
        for( BasicBlock::iterator inst = idxBBPair->first->begin(); ( phi =
dyn_cast< PHINode >( inst ) ) != 0; )
        {
            // Move the PHI node to the target basic block.
            phi->getParent()->getInstList().remove( inst++ );
            dispatcherBB->getInstList().insert(
dispatcherBB->getInstList().begin(), phi );

            // Since a PHI node must have an incoming value for every
predecessor of
            // the parent basic block we should add all predecessors of the
target
            // basic block that have no corresponding incoming value in the
PHI node
            // as new incoming undefined value to the PHI node.
            int wasIncomingCount = 0;
            for( pred_iterator pred = pred_begin( dispatcherBB ); pred !=
pred_end( dispatcherBB ); pred++ )
            {
                int idx = phi->getBasicBlockIndex( *pred );
                if( idx == -1 )
                {
                    phi->addIncoming( UndefValue::get( phi->getType() ),
*pred );
                }
                else
                {
                    wasIncomingCount++;
                }
            }
            assert( wasIncomingCount && "No one of the predecessors was not
incoming value to the PHI!" );
        }
    }
}

void MakeDispatcherPass::ConvertSwitch( Function& function )
{
    BasicBlock* entryBB = &function.getEntryBlock();

    for( Function::iterator i = function.begin(); i != function.end(); i++ )
    {
        BasicBlock* basicBlock = &*i;

        TerminatorInst* terminator = basicBlock->getTerminator();
        assert( terminator && "Basic block is not well formed and has no
terminator!" );

        if( isa< SwitchInst >( terminator ) )
        {
            SwitchInst* switchInst = dynamic_cast< SwitchInst * >(
basicBlock->getTerminator() );

            // Allocate a local variable and add an instruction saving the
value
            // being tested by the switch instruction to this variable.
            //
            // Allocating of the new variable is necessary because initial
order
            // of the basic blocks will be changed by dispatcher embedding.
            AllocaInst* testValuePtr = new AllocaInst(
switchInst->getCondition()->getType(), 0, "switch_test_val",
entryBB->getTerminator() );
            new StoreInst( switchInst->getCondition(), testValuePtr, false,
switchInst );

            // Replace the switch instruction by basic blocks with
conditional branches.
            BasicBlock* caseBlock = ProcessCase( switchInst, 1, basicBlock,
testValuePtr );
            switchInst->eraseFromParent();

            new BranchInst( caseBlock, basicBlock );
        }
    }
}

BasicBlock* MakeDispatcherPass::ProcessCase( SwitchInst* switchInst, int
caseIdx, BasicBlock* prevBB, Value* testValuePtr )
{
    // Create a new basic block for instructions checking the current case
of the switch instruction.
    BasicBlock* caseBlock = new BasicBlock( "case", prevBB->getParent(),
prevBB->getNext() );

    // If not all cases was handled ...
    if( caseIdx != switchInst->getNumCases() )
    {
        // Instructions for checking the case.
        LoadInst* testValue            = new LoadInst( testValuePtr,
"test_value", caseBlock );
        SetCondInst* setCondInst    = new SetCondInst( Instruction::SetEQ,
testValue, switchInst->getCaseValue( caseIdx ), "is_case", caseBlock );

        // Get a reference to the basic block for the next case.
        BasicBlock* nextCaseBlock = ProcessCase( switchInst, caseIdx + 1,
caseBlock, testValuePtr );

        // Add an instruction for branching to the basic block
        // corresponding to the condition of this case or to
        // the basic block checking the next case.
        BranchInst* branchInst = new BranchInst( switchInst->getSuccessor(
caseIdx ), nextCaseBlock, setCondInst, caseBlock );
    }
    else if( switchInst->getDefaultDest() != 0 )
    {
        BranchInst* branchInst = new BranchInst(
switchInst->getDefaultDest(), caseBlock );
    }

    return caseBlock;
}

void MakeDispatcherPass::ReduceTempVarsLifetime( Function& function )
{
    BasicBlock* entryBB = &function.getEntryBlock();

    typedef std::vector< Instruction * > InstList;

    // Collect all instructions (temporary variables) that have result used
outside
    // their parent basic block. Skip the AllocInst instructions because it
will be
    // handled separately during creation of new entry basic block.
    InstList insts;
    for( inst_iterator i = inst_begin( function ); i != inst_end( function
); i++ )
    {
        Instruction* inst = &*i;

        if( IsUsedOutsideParentBlock( inst ) && !isa< AllocaInst >( inst ) )
        {
            insts.push_back( inst );
        }
    }

    // Walk through the list of temporary variables to reduce their lifetime
    // to its parent basic block.
    for( InstList::iterator i = insts.begin(); i != insts.end(); i++ )
    {
        Instruction* inst = *i;

        // Collect all instructions that are using the temporary variable.
        InstList users;
        for( Value::use_iterator j = inst->use_begin(); j !=
inst->use_end(); j++ )
        {
            users.push_back( dynamic_cast< Instruction* >( *j ) );
        }

        // Allocate local variable on the function's stack frame and save
there
        // the temporary variable (result of the instruction).
        AllocaInst* valuePtr = new AllocaInst( inst->getType(), 0,
"reduced_var", entryBB->getTerminator() );
        new StoreInst( inst, valuePtr, false, inst->getNext() );

        // Patch users of the temporary variable to use the local function
variable.
        for( InstList::iterator j = users.begin(); j != users.end(); j++ )
        {
            Instruction* user = dynamic_cast< Instruction* >( *j );

            if( isa< PHINode >( user ) )
            {
                // For the phi node we should look through every incoming
value and,
                // if it is necessary, add to the preceding basic block an
instruction
                // for loading the saved temporary variable's value.
                PHINode* phiNode = dynamic_cast< PHINode* >( user );
                for( unsigned idx = 0; idx <
phiNode->getNumIncomingValues(); idx++ )
                {
                    if( phiNode->getIncomingValue( idx ) == inst )
                    {
                        LoadInst* loadedValue = new LoadInst( valuePtr,
"value_of_reduced_var", phiNode->getIncomingBlock( idx )->getTerminator() );
                        phiNode->setIncomingValue( idx, loadedValue );
                    }
                }
            }
            else
            {
                user->replaceUsesOfWith( inst, new LoadInst( valuePtr,
"value_of_reduced_var", user ) );
            }
        }
    }
}

bool MakeDispatcherPass::IsUsedOutsideParentBlock( Instruction* value )
{
    for( Value::use_iterator i = value->use_begin(); i != value->use_end();
i++ )
    {
        Instruction* user = dynamic_cast< Instruction* >( *i );
        if( user->getParent() != value->getParent() )
        {
            return true;
        }
    }
    return false;
}

---------------------------------------------
Main.cpp---------------------------------------------

#include <iostream>
#include <fstream>

#include "llvm/Pass.h"
#include "llvm/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Bytecode/Reader.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Bytecode/WriteBytecodePass.h"

#include "MakeDispatcherPass.h"

using namespace llvm;

static cl::opt< std::string >
InputFilename( cl::Positional, cl::desc( "<input .bc file>" ), cl::Required
);

static cl::opt< std::string >
OutputFilename( cl::Positional, cl::desc( "<output .bc file>" ),
cl::Required );

static cl::opt< bool >
NoVerify( "disable-verify", cl::desc( "Do not verify result module" ),
cl::Optional );

int main( int argc, char* argv[] )
{
    std::string programName = argv[ 0 ] = "obfuscator";

    try
    {
        cl::ParseCommandLineOptions( argc, argv );
        std::string errorMessage;

        // Load the input module.
        std::auto_ptr< Module > M( ParseBytecodeFile( InputFilename,
&errorMessage ) );
        if( M.get() == 0 )
        {
            std::cerr << programName << ": " << errorMessage << "\n";

            return 1;
        }

        // Open the stream we are supposed to write to.
        std::ostream *Out = new std::ofstream( OutputFilename.c_str(),
std::ios::out | std::ios::trunc | std::ios::binary );
        if( !Out->good() )
        {
            std::cerr << programName << ": Error opening " << OutputFilename
<< "!\n";
            return 1;
        }

        // Create a PassManager to hold and optimize the collection of
passes we are about to build...
        PassManager Passes;

        // Add an appropriate TargetData instance for this module.
        Passes.add( new TargetData( "obfuscator", M.get() ) );

        Passes.add( new MakeDispatcherPass() );

        if( !NoVerify )
        {
            Passes.add( createVerifierPass() );
        }
        Passes.add( new WriteBytecodePass( Out, true ) );

        // Now that we have all of the passes ready, run them.
        Passes.run( *M.get() );
    }
    catch( std::exception e )
    {
        std::cerr << programName << ": " << e.what() << "\n";
        return 1;
    }
    catch( const std::string& msg )
    {
        std::cerr << programName << ": " << msg << "\n";
        return 1;
    }
    catch( ... )
    {
        std::cerr << programName << ": Unexpected exception occurred.\n";
        return 1;
    }

    return 0;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20060517/0d482cb3/attachment.html>


More information about the llvm-dev mailing list