[Lldb-commits] [lldb] r119582 - in /lldb/trunk: include/lldb/Expression/IRForTarget.h source/Expression/IRForTarget.cpp

Sean Callanan scallanan at apple.com
Wed Nov 17 15:00:36 PST 2010


Author: spyffe
Date: Wed Nov 17 17:00:36 2010
New Revision: 119582

URL: http://llvm.org/viewvc/llvm-project?rev=119582&view=rev
Log:
Added support for constant strings of the form @"this-is-a-string".
They are replaced with calls to the CoreFoundation function 
CFStringCreateWithBytes() by a portion of the IRForTarget pass.

Modified:
    lldb/trunk/include/lldb/Expression/IRForTarget.h
    lldb/trunk/source/Expression/IRForTarget.cpp

Modified: lldb/trunk/include/lldb/Expression/IRForTarget.h
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/include/lldb/Expression/IRForTarget.h?rev=119582&r1=119581&r2=119582&view=diff
==============================================================================
--- lldb/trunk/include/lldb/Expression/IRForTarget.h (original)
+++ lldb/trunk/include/lldb/Expression/IRForTarget.h Wed Nov 17 17:00:36 2010
@@ -17,6 +17,7 @@
     class CallInst;
     class Constant;
     class Function;
+    class GlobalVariable;
     class Instruction;
     class Module;
     class Value;
@@ -112,6 +113,54 @@
     //------------------------------------------------------------------
     bool createResultVariable(llvm::Module &M,
                               llvm::Function &F);
+    
+    //------------------------------------------------------------------
+    /// A function-level pass to find Objective-C constant strings and
+    /// transform them to calls to CFStringCreateWithBytes.
+    //------------------------------------------------------------------
+
+    //------------------------------------------------------------------
+    /// Rewrite a single Objective-C constant string.
+    ///
+    /// @param[in] M
+    ///     The module currently being processed.
+    ///
+    /// @param[in] NSStr
+    ///     The constant NSString to be transformed
+    ///
+    /// @param[in] CStr
+    ///     The constant C string inside the NSString.  This will be
+    ///     passed as the bytes argument to CFStringCreateWithBytes.
+    ///
+    /// @param[in] FirstEntryInstruction
+    ///     An instruction early in the execution of the function.
+    ///     When this function synthesizes a call to 
+    ///     CFStringCreateWithBytes, it places the call before this
+    ///     instruction.  The instruction should come before all 
+    ///     uses of the NSString.
+    ///
+    /// @return
+    ///     True on success; false otherwise
+    //------------------------------------------------------------------
+    bool rewriteObjCConstString(llvm::Module &M,
+                                llvm::GlobalVariable *NSStr,
+                                llvm::GlobalVariable *CStr,
+                                llvm::Instruction *FirstEntryInstruction);    
+    
+    //------------------------------------------------------------------
+    /// The top-level pass implementation
+    ///
+    /// @param[in] M
+    ///     The module currently being processed.
+    ///
+    /// @param[in] F
+    ///     The function currently being processed.
+    ///
+    /// @return
+    ///     True on success; false otherwise
+    //------------------------------------------------------------------
+    bool rewriteObjCConstStrings(llvm::Module &M,
+                                 llvm::Function &F);
 
     //------------------------------------------------------------------
     /// A basic block-level pass to find all Objective-C method calls and
@@ -323,11 +372,40 @@
                           llvm::Function &F);
     
     /// Flags
-    bool                                    m_resolve_vars;         ///< True if external variable references and persistent variable references should be resolved
+    bool                                    m_resolve_vars;             ///< True if external variable references and persistent variable references should be resolved
+    
+    std::string                             m_func_name;                ///< The name of the function to translate
+    lldb_private::ClangExpressionDeclMap   *m_decl_map;                 ///< The DeclMap containing the Decls 
+    llvm::Constant                         *m_CFStringCreateWithBytes;  ///< The address of the function CFStringCreateWithBytes, cast to the appropriate function pointer type
+    llvm::Constant                         *m_sel_registerName;         ///< The address of the function sel_registerName, cast to the appropriate function pointer type
     
-    std::string                             m_func_name;            ///< The name of the function to translate
-    lldb_private::ClangExpressionDeclMap   *m_decl_map;             ///< The DeclMap containing the Decls 
-    llvm::Constant                         *m_sel_registerName;     ///< The address of the function sel_registerName, cast to the appropriate function pointer type
+private:
+    //------------------------------------------------------------------
+    /// UnfoldConstant operates on a constant [Old] which has just been 
+    /// replaced with a value [New].  We assume that new_value has 
+    /// been properly placed early in the function, in front of the 
+    /// first instruction in the entry basic block 
+    /// [FirstEntryInstruction].  
+    ///
+    /// UnfoldConstant reads through the uses of Old and replaces Old 
+    /// in those uses with New.  Where those uses are constants, the 
+    /// function generates new instructions to compute the result of the 
+    /// new, non-constant expression and places them before 
+    /// FirstEntryInstruction.  These instructions replace the constant
+    /// uses, so UnfoldConstant calls itself recursively for those.
+    ///
+    /// @param[in] M
+    ///     The module currently being processed.
+    ///
+    /// @param[in] F
+    ///     The function currently being processed.
+    ///
+    /// @return
+    ///     True on success; false otherwise
+    //------------------------------------------------------------------
+    static bool UnfoldConstant(llvm::Constant *Old, 
+                               llvm::Value *New, 
+                               llvm::Instruction *FirstEntryInstruction);
 };
 
 #endif

Modified: lldb/trunk/source/Expression/IRForTarget.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Expression/IRForTarget.cpp?rev=119582&r1=119581&r2=119582&view=diff
==============================================================================
--- lldb/trunk/source/Expression/IRForTarget.cpp (original)
+++ lldb/trunk/source/Expression/IRForTarget.cpp Wed Nov 17 17:00:36 2010
@@ -37,6 +37,7 @@
                          const char *func_name) :
     ModulePass(ID),
     m_decl_map(decl_map),
+    m_CFStringCreateWithBytes(NULL),
     m_sel_registerName(NULL),
     m_func_name(func_name),
     m_resolve_vars(resolve_vars)
@@ -256,6 +257,295 @@
     return true;
 }
 
+static void DebugUsers(lldb::LogSP &log, Value *V, uint8_t depth)
+{    
+    if (!depth)
+        return;
+    
+    depth--;
+    
+    log->Printf("  <Begin %d users>", V->getNumUses());
+    
+    for (Value::use_iterator ui = V->use_begin(), ue = V->use_end();
+         ui != ue;
+         ++ui)
+    {
+        log->Printf("  <Use %p> %s", *ui, PrintValue(*ui).c_str());
+        DebugUsers(log, *ui, depth);
+    }
+    
+    log->Printf("  <End uses>");
+}
+
+bool 
+IRForTarget::rewriteObjCConstString(llvm::Module &M,
+                                    llvm::GlobalVariable *NSStr,
+                                    llvm::GlobalVariable *CStr,
+                                    Instruction *FirstEntryInstruction)
+{
+    lldb::LogSP log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_EXPRESSIONS));
+    
+    const Type *i8_ptr_ty = Type::getInt8PtrTy(M.getContext());
+    const IntegerType *intptr_ty = Type::getIntNTy(M.getContext(),
+                                                   (M.getPointerSize() == Module::Pointer64) ? 64 : 32);
+    const Type *i32_ty = Type::getInt32Ty(M.getContext());
+    const Type *i8_ty = Type::getInt8Ty(M.getContext());
+    
+    if (!m_CFStringCreateWithBytes)
+    {
+        lldb::addr_t CFStringCreateWithBytes_addr;
+        
+        static lldb_private::ConstString g_CFStringCreateWithBytes_str ("CFStringCreateWithBytes");
+        
+        if (!m_decl_map->GetFunctionAddress (g_CFStringCreateWithBytes_str, CFStringCreateWithBytes_addr))
+        {
+            if (log)
+                log->PutCString("Couldn't find CFStringCreateWithBytes in the target");
+            
+            return false;
+        }
+            
+        if (log)
+            log->Printf("Found CFStringCreateWithBytes at 0x%llx", CFStringCreateWithBytes_addr);
+        
+        // Build the function type:
+        //
+        // CFStringRef CFStringCreateWithBytes (
+        //   CFAllocatorRef alloc,
+        //   const UInt8 *bytes,
+        //   CFIndex numBytes,
+        //   CFStringEncoding encoding,
+        //   Boolean isExternalRepresentation
+        // );
+        //
+        // We make the following substitutions:
+        //
+        // CFStringRef -> i8*
+        // CFAllocatorRef -> i8*
+        // UInt8 * -> i8*
+        // CFIndex -> long (i32 or i64, as appropriate; we ask the module for its pointer size for now)
+        // CFStringEncoding -> i32
+        // Boolean -> i8
+        
+        std::vector <const Type *> CFSCWB_arg_types;
+        CFSCWB_arg_types.push_back(i8_ptr_ty);
+        CFSCWB_arg_types.push_back(i8_ptr_ty);
+        CFSCWB_arg_types.push_back(intptr_ty);
+        CFSCWB_arg_types.push_back(i32_ty);
+        CFSCWB_arg_types.push_back(i8_ty);
+        llvm::Type *CFSCWB_ty = FunctionType::get(i8_ptr_ty, CFSCWB_arg_types, false);
+        
+        // Build the constant containing the pointer to the function
+        PointerType *CFSCWB_ptr_ty = PointerType::getUnqual(CFSCWB_ty);
+        Constant *CFSCWB_addr_int = ConstantInt::get(intptr_ty, CFStringCreateWithBytes_addr, false);
+        m_CFStringCreateWithBytes = ConstantExpr::getIntToPtr(CFSCWB_addr_int, CFSCWB_ptr_ty);
+    }
+    
+    ConstantArray *string_array = dyn_cast<ConstantArray>(CStr->getInitializer());
+                        
+    SmallVector <Value*, 5> CFSCWB_arguments;
+    
+    Constant *alloc_arg         = Constant::getNullValue(i8_ptr_ty);
+    Constant *bytes_arg         = ConstantExpr::getBitCast(CStr, i8_ptr_ty);
+    Constant *numBytes_arg      = ConstantInt::get(intptr_ty, string_array->getType()->getNumElements(), false);
+    Constant *encoding_arg      = ConstantInt::get(i32_ty, 0x0600, false); /* 0x0600 is kCFStringEncodingASCII */
+    Constant *isExternal_arg    = ConstantInt::get(i8_ty, 0x0, false); /* 0x0 is false */
+    
+    CFSCWB_arguments.push_back(alloc_arg);
+    CFSCWB_arguments.push_back(bytes_arg);
+    CFSCWB_arguments.push_back(numBytes_arg);
+    CFSCWB_arguments.push_back(encoding_arg);
+    CFSCWB_arguments.push_back(isExternal_arg);
+    
+    CallInst *CFSCWB_call = CallInst::Create(m_CFStringCreateWithBytes, 
+                                             CFSCWB_arguments.begin(),
+                                             CFSCWB_arguments.end(),
+                                             "CFStringCreateWithBytes",
+                                             FirstEntryInstruction);
+    
+    Constant *initializer = NSStr->getInitializer();
+        
+    if (!UnfoldConstant(NSStr, CFSCWB_call, FirstEntryInstruction))
+    {
+        if (log)
+            log->PutCString("Couldn't replace the NSString with the result of the call");
+        
+        return false;
+    }
+    
+    NSStr->eraseFromParent();
+    
+    return true;
+}
+
+bool
+IRForTarget::rewriteObjCConstStrings(Module &M,
+                                     Function &F)
+{
+    lldb::LogSP log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_EXPRESSIONS));
+    
+    ValueSymbolTable& value_symbol_table = M.getValueSymbolTable();
+    
+    BasicBlock &entry_block(F.getEntryBlock());
+    Instruction *FirstEntryInstruction(entry_block.getFirstNonPHIOrDbg());
+    
+    if (!FirstEntryInstruction)
+    {
+        if (log)
+            log->PutCString("Couldn't find first instruction for rewritten Objective-C strings");
+        
+        return false;
+    }
+    
+    for (ValueSymbolTable::iterator vi = value_symbol_table.begin(), ve = value_symbol_table.end();
+         vi != ve;
+         ++vi)
+    {
+        if (strstr(vi->first(), "_unnamed_cfstring_"))
+        {
+            Value *nsstring_value = vi->second;
+            
+            GlobalVariable *nsstring_global = dyn_cast<GlobalVariable>(nsstring_value);
+            
+            if (!nsstring_global)
+            {
+                if (log)
+                    log->PutCString("NSString variable is not a GlobalVariable");
+                return false;
+            }
+            
+            if (!nsstring_global->hasInitializer())
+            {
+                if (log)
+                    log->PutCString("NSString variable does not have an initializer");
+                return false;
+            }
+            
+            ConstantStruct *nsstring_struct = dyn_cast<ConstantStruct>(nsstring_global->getInitializer());
+            
+            if (!nsstring_struct)
+            {
+                if (log)
+                    log->PutCString("NSString variable's initializer is not a ConstantStruct");
+                return false;
+            }
+            
+            // We expect the following structure:
+            //
+            // struct {
+            //   int *isa;
+            //   int flags;
+            //   char *str;
+            //   long length;
+            // };
+            
+            if (nsstring_struct->getNumOperands() != 4)
+            {
+                if (log)
+                    log->Printf("NSString variable's initializer structure has an unexpected number of members.  Should be 4, is %d", nsstring_struct->getNumOperands());
+                return false;
+            }
+            
+            Constant *nsstring_member = nsstring_struct->getOperand(2);
+            
+            if (!nsstring_member)
+            {
+                if (log)
+                    log->PutCString("NSString initializer's str element was empty");
+                return false;
+            }
+            
+            ConstantExpr *nsstring_expr = dyn_cast<ConstantExpr>(nsstring_member);
+            
+            if (!nsstring_expr)
+            {
+                if (log)
+                    log->PutCString("NSString initializer's str element is not a ConstantExpr");
+                return false;
+            }
+            
+            if (nsstring_expr->getOpcode() != Instruction::GetElementPtr)
+            {
+                if (log)
+                    log->Printf("NSString initializer's str element is not a GetElementPtr expression, it's a %s", nsstring_expr->getOpcodeName());
+                return false;
+            }
+            
+            Constant *nsstring_cstr = nsstring_expr->getOperand(0);
+            
+            GlobalVariable *cstr_global = dyn_cast<GlobalVariable>(nsstring_cstr);
+            
+            if (!cstr_global)
+            {
+                if (log)
+                    log->PutCString("NSString initializer's str element is not a GlobalVariable");
+                
+                nsstring_cstr->dump();
+                
+                return false;
+            }
+            
+            if (!cstr_global->hasInitializer())
+            {
+                if (log)
+                    log->PutCString("NSString initializer's str element does not have an initializer");
+                return false;
+            }
+            
+            ConstantArray *cstr_array = dyn_cast<ConstantArray>(cstr_global->getInitializer());
+            
+            if (!cstr_array)
+            {
+                if (log)
+                    log->PutCString("NSString initializer's str element is not a ConstantArray");
+                return false;
+            }
+            
+            if (!cstr_array->isCString())
+            {
+                if (log)
+                    log->PutCString("NSString initializer's str element is not a C string array");
+                return false;
+            }
+            
+            if (log)
+                log->Printf("Found NSString constant %s, which contains \"%s\"", vi->first(), cstr_array->getAsString().c_str());
+            
+            if (!rewriteObjCConstString(M, nsstring_global, cstr_global, FirstEntryInstruction))
+            {
+                if (log)
+                    log->PutCString("Error rewriting the constant string");
+                return false;
+            }
+            
+            
+        }
+    }
+    
+    for (ValueSymbolTable::iterator vi = value_symbol_table.begin(), ve = value_symbol_table.end();
+         vi != ve;
+         ++vi)
+    {
+        if (!strcmp(vi->first(), "__CFConstantStringClassReference"))
+        {
+            GlobalVariable *gv = dyn_cast<GlobalVariable>(vi->second);
+            
+            if (!gv)
+            {
+                if (log)
+                    log->PutCString("__CFConstantStringClassReference is not a global variable");
+                return false;
+            }
+                
+            gv->eraseFromParent();
+                
+            break;
+        }
+    }
+    
+    return true;
+}
+
 static bool isObjCSelectorRef(Value *V)
 {
     GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
@@ -366,7 +656,7 @@
     CallInst *srN_call = CallInst::Create(m_sel_registerName, 
                                           srN_arguments.begin(),
                                           srN_arguments.end(),
-                                          "srN",
+                                          "sel_registerName",
                                           selector_load);
     
     // Replace the load with the call in all users
@@ -638,14 +928,14 @@
 
 bool
 IRForTarget::MaybeHandleCallArguments(Module &M,
-                                      CallInst *C)
+                                      CallInst *Old)
 {
     // lldb::LogSP log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_EXPRESSIONS));
     
-    for (unsigned op_index = 0, num_ops = C->getNumArgOperands();
+    for (unsigned op_index = 0, num_ops = Old->getNumArgOperands();
          op_index < num_ops;
          ++op_index)
-        if (!MaybeHandleVariable(M, C->getArgOperand(op_index))) // conservatively believe that this is a store
+        if (!MaybeHandleVariable(M, Old->getArgOperand(op_index))) // conservatively believe that this is a store
             return false;
     
     return true;
@@ -813,9 +1103,9 @@
 
 static bool isGuardVariableRef(Value *V)
 {
-    Constant *C;
+    Constant *Old;
     
-    if (!(C = dyn_cast<Constant>(V)))
+    if (!(Old = dyn_cast<Constant>(V)))
         return false;
     
     ConstantExpr *CE;
@@ -825,10 +1115,10 @@
         if (CE->getOpcode() != Instruction::BitCast)
             return false;
         
-        C = CE->getOperand(0);
+        Old = CE->getOperand(0);
     }
     
-    GlobalVariable *GV = dyn_cast<GlobalVariable>(C);
+    GlobalVariable *GV = dyn_cast<GlobalVariable>(Old);
     
     if (!GV || !GV->hasName() || !GV->getName().startswith("_ZGV"))
         return false;
@@ -909,19 +1199,8 @@
     return true;
 }
 
-// UnfoldConstant operates on a constant [C] which has just been replaced with a value
-// [new_value].  We assume that new_value has been properly placed early in the function,
-// most likely somewhere in front of the first instruction in the entry basic block 
-// [first_entry_instruction].  
-//
-// UnfoldConstant reads through the uses of C and replaces C in those uses with new_value.
-// Where those uses are constants, the function generates new instructions to compute the
-// result of the new, non-constant expression and places them before first_entry_instruction.  
-// These instructions replace the constant uses, so UnfoldConstant calls itself recursively
-// for those.
-
-static bool
-UnfoldConstant(Constant *C, Value *new_value, Instruction *first_entry_instruction)
+bool
+IRForTarget::UnfoldConstant(Constant *Old, Value *New, Instruction *FirstEntryInstruction)
 {
     lldb::LogSP log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_EXPRESSIONS));
 
@@ -931,8 +1210,8 @@
     
     // We do this because the use list might change, invalidating our iterator.
     // Much better to keep a work list ourselves.
-    for (ui = C->use_begin();
-         ui != C->use_end();
+    for (ui = Old->use_begin();
+         ui != Old->use_end();
          ++ui)
         users.push_back(*ui);
         
@@ -961,12 +1240,12 @@
                         
                         Value *s = constant_expr->getOperand(0);
                         
-                        if (s == C)
-                            s = new_value;
+                        if (s == Old)
+                            s = New;
                         
-                        BitCastInst *bit_cast(new BitCastInst(s, C->getType(), "", first_entry_instruction));
+                        BitCastInst *bit_cast(new BitCastInst(s, Old->getType(), "", FirstEntryInstruction));
                         
-                        UnfoldConstant(constant_expr, bit_cast, first_entry_instruction);
+                        UnfoldConstant(constant_expr, bit_cast, FirstEntryInstruction);
                     }
                     break;
                 case Instruction::GetElementPtr:
@@ -977,8 +1256,8 @@
                         
                         Value *ptr = constant_expr->getOperand(0);
                         
-                        if (ptr == C)
-                            ptr = new_value;
+                        if (ptr == Old)
+                            ptr = New;
                         
                         SmallVector<Value*, 16> indices;
                         
@@ -991,15 +1270,15 @@
                         {
                             Value *operand = constant_expr->getOperand(operand_index);
                             
-                            if (operand == C)
-                                operand = new_value;
+                            if (operand == Old)
+                                operand = New;
                             
                             indices.push_back(operand);
                         }
                         
-                        GetElementPtrInst *get_element_ptr(GetElementPtrInst::Create(ptr, indices.begin(), indices.end(), "", first_entry_instruction));
+                        GetElementPtrInst *get_element_ptr(GetElementPtrInst::Create(ptr, indices.begin(), indices.end(), "", FirstEntryInstruction));
                         
-                        UnfoldConstant(constant_expr, get_element_ptr, first_entry_instruction);
+                        UnfoldConstant(constant_expr, get_element_ptr, FirstEntryInstruction);
                     }
                     break;
                 }
@@ -1014,7 +1293,7 @@
         else
         {
             // simple fall-through case for non-constants
-            user->replaceUsesOfWith(C, new_value);
+            user->replaceUsesOfWith(Old, New);
         }
     }
     
@@ -1067,9 +1346,9 @@
         log->Printf("Arg: \"%s\"", PrintValue(argument).c_str());
     
     BasicBlock &entry_block(F.getEntryBlock());
-    Instruction *first_entry_instruction(entry_block.getFirstNonPHIOrDbg());
+    Instruction *FirstEntryInstruction(entry_block.getFirstNonPHIOrDbg());
     
-    if (!first_entry_instruction)
+    if (!FirstEntryInstruction)
         return false;
     
     LLVMContext &context(M.getContext());
@@ -1096,11 +1375,11 @@
                         offset);
         
         ConstantInt *offset_int(ConstantInt::getSigned(offset_type, offset));
-        GetElementPtrInst *get_element_ptr = GetElementPtrInst::Create(argument, offset_int, "", first_entry_instruction);
-        BitCastInst *bit_cast = new BitCastInst(get_element_ptr, value->getType(), "", first_entry_instruction);
+        GetElementPtrInst *get_element_ptr = GetElementPtrInst::Create(argument, offset_int, "", FirstEntryInstruction);
+        BitCastInst *bit_cast = new BitCastInst(get_element_ptr, value->getType(), "", FirstEntryInstruction);
         
         if (Constant *constant = dyn_cast<Constant>(value))
-            UnfoldConstant(constant, bit_cast, first_entry_instruction);
+            UnfoldConstant(constant, bit_cast, FirstEntryInstruction);
         else
             value->replaceAllUsesWith(bit_cast);
         
@@ -1138,6 +1417,37 @@
     if (!createResultVariable(M, *function))
         return false;
     
+    ///////////////////////////////////////////////////////////////////////////////
+    // Fix all Objective-C constant strings to use NSStringWithCString:encoding:
+    //
+    
+    if (log)
+    {
+        std::string s;
+        raw_string_ostream oss(s);
+        
+        M.print(oss, NULL);
+        
+        oss.flush();
+        
+        log->Printf("Module after creating the result variable: \n\"%s\"", s.c_str());
+    }
+    
+    if (!rewriteObjCConstStrings(M, *function))
+        return false;
+    
+    if (log)
+    {
+        std::string s;
+        raw_string_ostream oss(s);
+        
+        M.print(oss, NULL);
+        
+        oss.flush();
+        
+        log->Printf("Module after rewriting Objective-C const strings: \n\"%s\"", s.c_str());
+    }
+    
     //////////////////////////////////
     // Run basic-block level passes
     //





More information about the lldb-commits mailing list