[llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp Phases.cpp

Joel Stanley jstanley at cs.uiuc.edu
Sun May 4 14:55:05 PDT 2003


Changes in directory llvm/lib/Reoptimizer/Inst/lib:

ElfReader.cpp updated: 1.8 -> 1.9
Phases.cpp updated: 1.21 -> 1.22

---
Log message:

Bugfixes to ELF reader, redesigned slightly to handle archive files properly.



---
Diffs of the changes:

Index: llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp
diff -u llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp:1.8 llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp:1.9
--- llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp:1.8	Wed Apr 30 17:42:36 2003
+++ llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp	Sun May  4 15:01:30 2003
@@ -2,7 +2,9 @@
 // programmer: Joel Stanley
 //       date: Fri Mar 21 16:11:07 CST 2003
 //     fileid: ElfReader.cpp
-//    purpose: Implementation of ELF reader interface.
+//    purpose: Implementation of ELF reader interface as described in ElfReader.h. Note
+//    that special care is taken to ensure that archive files are treated the same as
+//    single object files.
 //
 // TODO:
 //
@@ -14,6 +16,7 @@
 //
 // []
 
+#include <ar.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -22,118 +25,290 @@
 #include <sys/types.h>
 #include <unistd.h>
 #include <iostream>
+#include <iomanip>
 
 #include "llvm/Reoptimizer/Inst/ElfReader.h"
 
 using std::cerr;
 using std::endl;
 
+// Symbol binding values that don't occur in the ELF spec, but that are recognized as WEAK
+// and GLOB by the elfdump command-line utility. For matching output.
+
+#define ELFDUMP_GLOB 0x10
+#define ELFDUMP_WEAK 0x20
+
 const std::string ElfReader::sm_codeSegmentName = ".text";
 
 ElfReader::ElfReader(const char* execName):
-    m_execFD(-1),
-    m_elfDes(0),
-    m_elfHdr(0),
-    m_symTab(0)
-{
-    m_execFD = open(execName, O_RDONLY);
-    assert(m_execFD && "Failed to open executable image");
-
-    // Obtain the ELF descriptor
-    elf_version(EV_CURRENT);
-    m_elfDes = elf_begin(m_execFD, ELF_C_READ, 0);
-    assert(m_elfDes && "Failed to obtain valid ELF descriptor");
+    m_fd(-1),
+    m_pPrimaryElf(0),
+    m_pCurrElf(0),
+    m_currCmd(ELF_C_READ),
+    m_pElfHdr(0),
+    m_pSymHdr(0),
+    m_pSymSec(0),
+    m_pStrTab(0),
+    m_pSecNameTab(0),
+    m_pCurrSym(0),
+    m_pSymEnd(0)
+{
+    m_fd = open(execName, O_RDONLY);
+    assert(m_fd != -1 && "Failed to open executable image");
+
+    if(elf_version(EV_CURRENT) == EV_NONE) {
+        cerr << "ELF library version mismatch" << endl;
+        exit(1);
+    }
+
+    // Obtain the primary ELF descriptor
+    m_pPrimaryElf = elf_begin(m_fd, m_currCmd, 0);
+    assert(m_pPrimaryElf && "Failed to obtain primary ELF descriptor");
+
+    // Advance to the first file in the archive (essentially a no-op if only one file)
+    m_pCurrElf = elf_begin(m_fd, m_currCmd, m_pPrimaryElf);
+
+#if 0
+    if(Elf_Arhdr* arhdr = elf_getarhdr(m_pCurrElf))
+        cerr << "AR member name: " << arhdr->ar_name << endl;
+#endif
+}
+
+ElfReader::~ElfReader()
+{
+    elf_end(m_pPrimaryElf);
+    close(m_fd);
+}
+
+void ElfReader::printSymTableEntry(Elf64_Sym* sym, std::ostream& ostr) 
+{
+    // Print this symbol table entry. Output format is similar to that of the elfdump
+    // command line utility.
+
+    ostr << std::setw(16) << std::hex << std::setfill('0') << sym->st_value << " "
+         << std::setw(16) << std::hex << std::setfill('0') << sym->st_size;
+
+    int type = sym->st_info & 0xf;
+    ostr << " " << std::setw(4);
+    
+    switch(type) {
+        case STT_NOTYPE:  ostr << "NOTY"; break;
+        case STT_OBJECT:  ostr << "OBJT"; break;
+        case STT_FUNC:    ostr << "FUNC"; break;
+        case STT_SECTION: ostr << "SECT"; break;
+
+        case STT_FILE:
+        case STT_COMMON:
+        case STT_TLS:
+        case STT_NUM:
+        case STT_LOPROC:
+        case STT_HIPROC: 
+            ostr << "OTHR"; break;
+            
+        default:
+            //ostr << "(" << type << ")" << endl;
+            assert(0 && "Symbol table entry has unknown type");
+            break;
+    }
+    
+    int binding = sym->st_info & 0xf0;
+    ostr << " " << std::setw(4);
+
+    switch(binding) {
+        case STB_LOCAL:  ostr << "LOCL"; break;
+
+        case ELFDUMP_GLOB:
+        case STB_GLOBAL: ostr << "GLOB"; break;
+
+        case ELFDUMP_WEAK:
+        case STB_WEAK:   ostr << "WEAK"; break;
+
+        case STB_NUM:    ostr << "NUMB"; break;
+
+        case STB_LOPROC:
+        case STB_HIPROC:
+            ostr << "OTHR"; break;
+
+        default:
+            //ostr << "(" << binding << ")" << endl;
+            assert(0 && "Unknown binding type");
+            break;
+    }
+
+    ostr << " " << std::setw(11) << std::setfill(' ');
+
+    if(sym->st_shndx == SHN_UNDEF)
+        ostr << "UNDEF";
+    else if(sym->st_shndx == SHN_ABS)
+        ostr << "ABS";
+    else if(sym->st_shndx == SHN_COMMON)
+        ostr << "COMN";
+    else {
+        // Grab the section header for this section
+        Elf_Scn* scn = elf_getscn(m_pCurrElf, sym->st_shndx);
+        assert(scn && "Couldn't find section descriptor");
+        Elf64_Shdr* secHdr = elf64_getshdr(scn);
+        assert(secHdr && "Unable to obtain section header");
+
+        // Obtain, truncate, and output section name
+        char* secName = m_pSecNameTab + secHdr->sh_name;
+        char truncated[12];
+        strncpy(truncated, secName, sizeof(truncated) - 1);
+        truncated[sizeof(truncated) - 1] = '\0';
+        ostr << truncated;
+    }
+    
+    // Output symbol name
+    ostr << " " << (m_pStrTab + sym->st_name) << endl;
+}
+
+void ElfReader::nextFile()
+{
+    // Advance to the next file in the archive.
+    m_currCmd = elf_next(m_pCurrElf);
+
+    // Dispose of the old ELF descriptor and get a new one that corresponds to the new
+    // file in the archive.
+
+    elf_end(m_pCurrElf);
+    m_pCurrElf = elf_begin(m_fd, m_currCmd, m_pPrimaryElf);
+
+#if 0
+    if(Elf_Arhdr* arhdr = elf_getarhdr(m_pCurrElf))
+        cerr << "AR member name: " << arhdr->ar_name << endl;
+#endif
+
+    m_pElfHdr = 0;
+    m_pSymHdr = 0;
+    m_pSymSec = 0;
+    m_pStrTab = m_pSecNameTab = 0;
+    m_pCurrSym = m_pSymEnd = 0;
+
+    // NB: m_pCurrElf will be 0 here if we've reached the end of the archive
+}
+
+bool ElfReader::handleNewFile() 
+{
+    assert(!m_pElfHdr && "handleNewFile expects that no ELF header exists!");
 
     // Grab ptr to ELF header
-    m_elfHdr = elf64_getehdr(m_elfDes);
-    assert(m_elfHdr && "Failed to obtain valid ELF header");
+    m_pElfHdr = elf64_getehdr(m_pCurrElf);
+    if(!m_pElfHdr)
+        return false;
+
+    // Obtain pointer to the string table of section names
+    Elf_Data* strTabHand = elf_getdata(elf_getscn(m_pCurrElf, m_pElfHdr->e_shstrndx), 0);
+    assert(strTabHand && "Couldn't obtain ELF data handle to section-name string table");
+    m_pSecNameTab = (char*) strTabHand->d_buf;
 
-    LocateSections();
+    // Find symbol table(s), etc.
+    locateSections();
 
     // Obtain pointer to string table associated with the symbol table
-    Elf_Data* strTabHand = elf_getdata(elf_getscn(m_elfDes, m_symTab->sh_link), 0);
+    strTabHand = elf_getdata(elf_getscn(m_pCurrElf, m_pSymHdr->sh_link), 0);
     assert(strTabHand && "Couldn't obtain ELF data handle to symbol-name string table");
-    m_strTab = (char*) strTabHand->d_buf;
+    m_pStrTab = (char*) strTabHand->d_buf;
 
-    // Determine the size of each entry and the number of entries in the table
-    m_entrySize = m_symTab->sh_entsize;
-    m_numEntries = m_symTab->sh_size / m_entrySize;
-    assert(m_symTab->sh_size % m_entrySize == 0 && "Symtable size must be multiple of entry size");
-    
-    // Seek to the start of the symbol table in the file
-    if(lseek(m_execFD, m_symTab->sh_offset, SEEK_SET) < 0)
-        assert(0 && "Couldn't seek to start of symbol table");
-
-    m_entriesProcessed = 0;
-}
-
-bool ElfReader::GetNextFunction(std::string& fname,
-                                std::pair<uint64_t, uint64_t>& range,
-                                unsigned instWidth)
-{
-    // Locate next function (skipping non-function entries) in the symbol table if
-    // possible.  If found, return true, yielding name & extents by reference.  Return
-    // false otherwise.
-    //
-    // NB: Address range contains the (closed) memory interval [start,end] of the memory
-    // addresses corresponding to the function with function fname.
-    
-    while(m_entriesProcessed < m_numEntries) {
-        m_entriesProcessed++;
-
-        // Read one entry in the table
-        Elf64_Sym sym;
-        unsigned rdcnt = 0;
-        do {
-            rdcnt = read(m_execFD, &sym + rdcnt, m_entrySize);
-        } while(rdcnt < m_entrySize);
-
-        // If it is a function in the code segment, extract name, extents, and return.
-        if(STT_FUNC == (sym.st_info & 0xf)) { // Symbol type is lower 4 bits
-            if(sym.st_shndx == m_codeSectionIdx) {
-                fname = m_strTab + sym.st_name;
-                range.first = sym.st_value;
-                range.second = sym.st_value + sym.st_size - instWidth;
-                return true;
-            }
+    assert(m_pSymHdr->sh_entsize == sizeof(Elf64_Sym) &&
+           "Expect entry size to correspond to size of Elf64_Sym type");
+    
+    // Determine the number of entries in the table
+    Elf64_Xword numEntries = m_pSymHdr->sh_size / sizeof(Elf64_Sym);
+    assert(m_pSymHdr->sh_size % sizeof(Elf64_Sym) == 0 && "Symtable size must be multiple of entry size");
+    assert(m_pSymHdr->sh_size > 0 && "Empty symbol table encountered");
+
+    // Obtain the symbol table data
+    Elf_Data* data = elf_getdata(m_pSymSec, 0);
+    assert(data && "Could not obtain symtable data");
+    assert(data->d_size == m_pSymHdr->sh_size && "Size in data and size in header do not correspond");
+    assert(data->d_type == ELF_T_SYM && "Unexpected data type found in symtable");
+    assert(!elf_getdata(m_pSymSec, data) && "No more data expected in section");
+
+    // Set current entry pointer and pointer to end of table
+    m_pCurrSym = m_pSymEnd = (Elf64_Sym*) data->d_buf;
+    m_pSymEnd += numEntries;
+    
+    return true;
+}
+
+bool ElfReader::findNextSymbol(std::string& fname,
+                               std::pair<uint64_t, uint64_t>& range,
+                               unsigned instWidth)
+{
+    if(!m_pCurrElf) {
+        // No valid ELF descriptor? Nothing left to process.
+        return false;
+    }
+
+    if(!m_pElfHdr) {
+        // No ELF header? Must be about to process a new file in the archive. Perform the
+        // necessary setup that must occur before processing. If handling the current
+        // new file fails, try the next one.
+
+        if(!handleNewFile()) {
+            nextFile();
+            return findNextSymbol(fname, range, instWidth);
         }
     }
+    
+    assert(m_pCurrSym && m_pSymEnd && "Invalid symtable pointers");
 
-    return false;
-}
+    // Locate next function (skipping non-function entries) in the symbol table that is
+    // marked as being in the code segment. If found, return true, yielding name & extents
+    // by reference. If we are done scanning the symbol table, advance to the next file in
+    // the archive and continue searching recursively.
+
+    bool foundSym = false;
+    for( ; !foundSym && m_pCurrSym != m_pSymEnd; ++m_pCurrSym) {
+        unsigned info = m_pCurrSym->st_info & 0xf; // Symbol type is lower 4 bits
+        
+        if(STT_FUNC == info && m_pCurrSym->st_shndx == m_codeSecIdx) {
+            //printSymTableEntry(m_pCurrSym);
+
+            fname = m_pStrTab + m_pCurrSym->st_name;
+            
+            // NB: Address range contains the (closed) memory interval [start,end] of
+            // the memory addresses of function with symbol fname.
+            
+            range.first = m_pCurrSym->st_value;
+            range.second = m_pCurrSym->st_value + m_pCurrSym->st_size - instWidth;
+            foundSym = true;
+        }
+    }
 
-void ElfReader::LocateSections()
-{
-    // Obtain the base pointer to the string table containing the names of the
-    // sections in the file.
+    if(foundSym)
+        return true;
 
-    Elf_Data* strTabHand = elf_getdata(elf_getscn(m_elfDes, m_elfHdr->e_shstrndx), 0);
-    assert(strTabHand && "Couldn't obtain ELF data handle to section-name string table");
-    char* secNameTable = (char*) strTabHand->d_buf;
+    // We have processed all of the symbols in this file.
+    nextFile();
+    return findNextSymbol(fname, range, instWidth);
+}
 
+void ElfReader::locateSections()
+{
     // Examine the section header of each section, looking for:
     // a) The symbol table.  When found ensure that it is unique, otherwise, assert out.
     // b) The code segment (i.e. the segment which has the spec-defined name ".text"
 
     bool codeSegmentFound = false;
     Elf64_Shdr* secHdr;
-    for(Elf_Scn* currScn = 0; (currScn = elf_nextscn(m_elfDes, currScn)); ) {
+    for(Elf_Scn* currScn = 0; (currScn = elf_nextscn(m_pCurrElf, currScn)); ) {
         secHdr = elf64_getshdr(currScn);
         assert(secHdr && "Unable to obtain section header");
 
         if(SHT_SYMTAB == secHdr->sh_type) {
             // Found section marked as a symbol table
-            assert(!m_symTab && "Should only be one symbol table in the image");
-            m_symTab = secHdr;
+            assert(!m_pSymHdr && !m_pSymSec && "Should only be one symbol table in the image");
+            m_pSymHdr = secHdr;
+            m_pSymSec = currScn;
         }
         else if(SHT_PROGBITS == secHdr->sh_type) {
             if(!codeSegmentFound) {
-                // Found section marked as "program-defined".  Obtain section name and
-                // see if it matches the name of the code segment.
-                
-                char* sectionName = secNameTable + secHdr->sh_name;
+                // Found section marked as "program-defined".  Obtain section name and see
+                // if it matches the name of the code segment.
+                char* sectionName = m_pSecNameTab + secHdr->sh_name;
                 if(sm_codeSegmentName == sectionName) {
-                    m_codeSectionIdx = elf_ndxscn(currScn);
+                    m_codeSecIdx = elf_ndxscn(currScn);
                     codeSegmentFound = true;
                 }
             }
@@ -141,10 +316,8 @@
         // NB: May need to look for SHT_DYNSYM here later on.
     }
 
-    assert(m_symTab && "Couldn't locate symbol table (stripped executable?)");
-    assert(codeSegmentFound && "Couldn't locate code segment");
-}
+    assert(m_pSymHdr && m_pSymSec &&
+           "Couldn't locate symbol table (stripped executable?)");
 
-ElfReader::~ElfReader() 
-{
+    assert(codeSegmentFound && "Couldn't locate code segment");
 }


Index: llvm/lib/Reoptimizer/Inst/lib/Phases.cpp
diff -u llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.21 llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.22
--- llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.21	Wed Apr 30 17:42:36 2003
+++ llvm/lib/Reoptimizer/Inst/lib/Phases.cpp	Sun May  4 15:01:30 2003
@@ -176,7 +176,7 @@
     // functions here -- this could be quite large.
     
     vector<std::pair<std::string, AddressRange> > funcs;
-    while(elfReader.GetNextFunction(funcName, range, m_pIM->getInstWidth()))
+    while(elfReader.findNextSymbol(funcName, range, m_pIM->getInstWidth()))
         funcs.push_back(std::make_pair(funcName, range));
 
     cerr << "There are " << funcs.size() << " functions to process." << endl << endl;
@@ -185,10 +185,16 @@
     
     for(vector<std::pair<std::string, AddressRange> >::iterator i = funcs.begin(),
             e = funcs.end(); i != e; ++i) {
+
+#if 1
+        cerr << i->first << endl;
+        
+#else
         if(i->first == "fibs") {
             cerr << "Transforming function " << i->first << "..." << endl;
             transformFunction(i->second);
         }
+#endif
     }
 
     cerr << "============================== End Phase 2 ==============================\n";    





More information about the llvm-commits mailing list