[llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp Phases.cpp
Joel Stanley
jstanley at cs.uiuc.edu
Sun May 4 14:55:05 PDT 2003
Changes in directory llvm/lib/Reoptimizer/Inst/lib:
ElfReader.cpp updated: 1.8 -> 1.9
Phases.cpp updated: 1.21 -> 1.22
---
Log message:
Bugfixes to ELF reader, redesigned slightly to handle archive files properly.
---
Diffs of the changes:
Index: llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp
diff -u llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp:1.8 llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp:1.9
--- llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp:1.8 Wed Apr 30 17:42:36 2003
+++ llvm/lib/Reoptimizer/Inst/lib/ElfReader.cpp Sun May 4 15:01:30 2003
@@ -2,7 +2,9 @@
// programmer: Joel Stanley
// date: Fri Mar 21 16:11:07 CST 2003
// fileid: ElfReader.cpp
-// purpose: Implementation of ELF reader interface.
+// purpose: Implementation of ELF reader interface as described in ElfReader.h. Note
+// that special care is taken to ensure that archive files are treated the same as
+// single object files.
//
// TODO:
//
@@ -14,6 +16,7 @@
//
// []
+#include <ar.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -22,118 +25,290 @@
#include <sys/types.h>
#include <unistd.h>
#include <iostream>
+#include <iomanip>
#include "llvm/Reoptimizer/Inst/ElfReader.h"
using std::cerr;
using std::endl;
+// Symbol binding values that don't occur in the ELF spec, but that are recognized as WEAK
+// and GLOB by the elfdump command-line utility. For matching output.
+
+#define ELFDUMP_GLOB 0x10
+#define ELFDUMP_WEAK 0x20
+
const std::string ElfReader::sm_codeSegmentName = ".text";
ElfReader::ElfReader(const char* execName):
- m_execFD(-1),
- m_elfDes(0),
- m_elfHdr(0),
- m_symTab(0)
-{
- m_execFD = open(execName, O_RDONLY);
- assert(m_execFD && "Failed to open executable image");
-
- // Obtain the ELF descriptor
- elf_version(EV_CURRENT);
- m_elfDes = elf_begin(m_execFD, ELF_C_READ, 0);
- assert(m_elfDes && "Failed to obtain valid ELF descriptor");
+ m_fd(-1),
+ m_pPrimaryElf(0),
+ m_pCurrElf(0),
+ m_currCmd(ELF_C_READ),
+ m_pElfHdr(0),
+ m_pSymHdr(0),
+ m_pSymSec(0),
+ m_pStrTab(0),
+ m_pSecNameTab(0),
+ m_pCurrSym(0),
+ m_pSymEnd(0)
+{
+ m_fd = open(execName, O_RDONLY);
+ assert(m_fd != -1 && "Failed to open executable image");
+
+ if(elf_version(EV_CURRENT) == EV_NONE) {
+ cerr << "ELF library version mismatch" << endl;
+ exit(1);
+ }
+
+ // Obtain the primary ELF descriptor
+ m_pPrimaryElf = elf_begin(m_fd, m_currCmd, 0);
+ assert(m_pPrimaryElf && "Failed to obtain primary ELF descriptor");
+
+ // Advance to the first file in the archive (essentially a no-op if only one file)
+ m_pCurrElf = elf_begin(m_fd, m_currCmd, m_pPrimaryElf);
+
+#if 0
+ if(Elf_Arhdr* arhdr = elf_getarhdr(m_pCurrElf))
+ cerr << "AR member name: " << arhdr->ar_name << endl;
+#endif
+}
+
+ElfReader::~ElfReader()
+{
+ elf_end(m_pPrimaryElf);
+ close(m_fd);
+}
+
+void ElfReader::printSymTableEntry(Elf64_Sym* sym, std::ostream& ostr)
+{
+ // Print this symbol table entry. Output format is similar to that of the elfdump
+ // command line utility.
+
+ ostr << std::setw(16) << std::hex << std::setfill('0') << sym->st_value << " "
+ << std::setw(16) << std::hex << std::setfill('0') << sym->st_size;
+
+ int type = sym->st_info & 0xf;
+ ostr << " " << std::setw(4);
+
+ switch(type) {
+ case STT_NOTYPE: ostr << "NOTY"; break;
+ case STT_OBJECT: ostr << "OBJT"; break;
+ case STT_FUNC: ostr << "FUNC"; break;
+ case STT_SECTION: ostr << "SECT"; break;
+
+ case STT_FILE:
+ case STT_COMMON:
+ case STT_TLS:
+ case STT_NUM:
+ case STT_LOPROC:
+ case STT_HIPROC:
+ ostr << "OTHR"; break;
+
+ default:
+ //ostr << "(" << type << ")" << endl;
+ assert(0 && "Symbol table entry has unknown type");
+ break;
+ }
+
+ int binding = sym->st_info & 0xf0;
+ ostr << " " << std::setw(4);
+
+ switch(binding) {
+ case STB_LOCAL: ostr << "LOCL"; break;
+
+ case ELFDUMP_GLOB:
+ case STB_GLOBAL: ostr << "GLOB"; break;
+
+ case ELFDUMP_WEAK:
+ case STB_WEAK: ostr << "WEAK"; break;
+
+ case STB_NUM: ostr << "NUMB"; break;
+
+ case STB_LOPROC:
+ case STB_HIPROC:
+ ostr << "OTHR"; break;
+
+ default:
+ //ostr << "(" << binding << ")" << endl;
+ assert(0 && "Unknown binding type");
+ break;
+ }
+
+ ostr << " " << std::setw(11) << std::setfill(' ');
+
+ if(sym->st_shndx == SHN_UNDEF)
+ ostr << "UNDEF";
+ else if(sym->st_shndx == SHN_ABS)
+ ostr << "ABS";
+ else if(sym->st_shndx == SHN_COMMON)
+ ostr << "COMN";
+ else {
+ // Grab the section header for this section
+ Elf_Scn* scn = elf_getscn(m_pCurrElf, sym->st_shndx);
+ assert(scn && "Couldn't find section descriptor");
+ Elf64_Shdr* secHdr = elf64_getshdr(scn);
+ assert(secHdr && "Unable to obtain section header");
+
+ // Obtain, truncate, and output section name
+ char* secName = m_pSecNameTab + secHdr->sh_name;
+ char truncated[12];
+ strncpy(truncated, secName, sizeof(truncated) - 1);
+ truncated[sizeof(truncated) - 1] = '\0';
+ ostr << truncated;
+ }
+
+ // Output symbol name
+ ostr << " " << (m_pStrTab + sym->st_name) << endl;
+}
+
+void ElfReader::nextFile()
+{
+ // Advance to the next file in the archive.
+ m_currCmd = elf_next(m_pCurrElf);
+
+ // Dispose of the old ELF descriptor and get a new one that corresponds to the new
+ // file in the archive.
+
+ elf_end(m_pCurrElf);
+ m_pCurrElf = elf_begin(m_fd, m_currCmd, m_pPrimaryElf);
+
+#if 0
+ if(Elf_Arhdr* arhdr = elf_getarhdr(m_pCurrElf))
+ cerr << "AR member name: " << arhdr->ar_name << endl;
+#endif
+
+ m_pElfHdr = 0;
+ m_pSymHdr = 0;
+ m_pSymSec = 0;
+ m_pStrTab = m_pSecNameTab = 0;
+ m_pCurrSym = m_pSymEnd = 0;
+
+ // NB: m_pCurrElf will be 0 here if we've reached the end of the archive
+}
+
+bool ElfReader::handleNewFile()
+{
+ assert(!m_pElfHdr && "handleNewFile expects that no ELF header exists!");
// Grab ptr to ELF header
- m_elfHdr = elf64_getehdr(m_elfDes);
- assert(m_elfHdr && "Failed to obtain valid ELF header");
+ m_pElfHdr = elf64_getehdr(m_pCurrElf);
+ if(!m_pElfHdr)
+ return false;
+
+ // Obtain pointer to the string table of section names
+ Elf_Data* strTabHand = elf_getdata(elf_getscn(m_pCurrElf, m_pElfHdr->e_shstrndx), 0);
+ assert(strTabHand && "Couldn't obtain ELF data handle to section-name string table");
+ m_pSecNameTab = (char*) strTabHand->d_buf;
- LocateSections();
+ // Find symbol table(s), etc.
+ locateSections();
// Obtain pointer to string table associated with the symbol table
- Elf_Data* strTabHand = elf_getdata(elf_getscn(m_elfDes, m_symTab->sh_link), 0);
+ strTabHand = elf_getdata(elf_getscn(m_pCurrElf, m_pSymHdr->sh_link), 0);
assert(strTabHand && "Couldn't obtain ELF data handle to symbol-name string table");
- m_strTab = (char*) strTabHand->d_buf;
+ m_pStrTab = (char*) strTabHand->d_buf;
- // Determine the size of each entry and the number of entries in the table
- m_entrySize = m_symTab->sh_entsize;
- m_numEntries = m_symTab->sh_size / m_entrySize;
- assert(m_symTab->sh_size % m_entrySize == 0 && "Symtable size must be multiple of entry size");
-
- // Seek to the start of the symbol table in the file
- if(lseek(m_execFD, m_symTab->sh_offset, SEEK_SET) < 0)
- assert(0 && "Couldn't seek to start of symbol table");
-
- m_entriesProcessed = 0;
-}
-
-bool ElfReader::GetNextFunction(std::string& fname,
- std::pair<uint64_t, uint64_t>& range,
- unsigned instWidth)
-{
- // Locate next function (skipping non-function entries) in the symbol table if
- // possible. If found, return true, yielding name & extents by reference. Return
- // false otherwise.
- //
- // NB: Address range contains the (closed) memory interval [start,end] of the memory
- // addresses corresponding to the function with function fname.
-
- while(m_entriesProcessed < m_numEntries) {
- m_entriesProcessed++;
-
- // Read one entry in the table
- Elf64_Sym sym;
- unsigned rdcnt = 0;
- do {
- rdcnt = read(m_execFD, &sym + rdcnt, m_entrySize);
- } while(rdcnt < m_entrySize);
-
- // If it is a function in the code segment, extract name, extents, and return.
- if(STT_FUNC == (sym.st_info & 0xf)) { // Symbol type is lower 4 bits
- if(sym.st_shndx == m_codeSectionIdx) {
- fname = m_strTab + sym.st_name;
- range.first = sym.st_value;
- range.second = sym.st_value + sym.st_size - instWidth;
- return true;
- }
+ assert(m_pSymHdr->sh_entsize == sizeof(Elf64_Sym) &&
+ "Expect entry size to correspond to size of Elf64_Sym type");
+
+ // Determine the number of entries in the table
+ Elf64_Xword numEntries = m_pSymHdr->sh_size / sizeof(Elf64_Sym);
+ assert(m_pSymHdr->sh_size % sizeof(Elf64_Sym) == 0 && "Symtable size must be multiple of entry size");
+ assert(m_pSymHdr->sh_size > 0 && "Empty symbol table encountered");
+
+ // Obtain the symbol table data
+ Elf_Data* data = elf_getdata(m_pSymSec, 0);
+ assert(data && "Could not obtain symtable data");
+ assert(data->d_size == m_pSymHdr->sh_size && "Size in data and size in header do not correspond");
+ assert(data->d_type == ELF_T_SYM && "Unexpected data type found in symtable");
+ assert(!elf_getdata(m_pSymSec, data) && "No more data expected in section");
+
+ // Set current entry pointer and pointer to end of table
+ m_pCurrSym = m_pSymEnd = (Elf64_Sym*) data->d_buf;
+ m_pSymEnd += numEntries;
+
+ return true;
+}
+
+bool ElfReader::findNextSymbol(std::string& fname,
+ std::pair<uint64_t, uint64_t>& range,
+ unsigned instWidth)
+{
+ if(!m_pCurrElf) {
+ // No valid ELF descriptor? Nothing left to process.
+ return false;
+ }
+
+ if(!m_pElfHdr) {
+ // No ELF header? Must be about to process a new file in the archive. Perform the
+ // necessary setup that must occur before processing. If handling the current
+ // new file fails, try the next one.
+
+ if(!handleNewFile()) {
+ nextFile();
+ return findNextSymbol(fname, range, instWidth);
}
}
+
+ assert(m_pCurrSym && m_pSymEnd && "Invalid symtable pointers");
- return false;
-}
+ // Locate next function (skipping non-function entries) in the symbol table that is
+ // marked as being in the code segment. If found, return true, yielding name & extents
+ // by reference. If we are done scanning the symbol table, advance to the next file in
+ // the archive and continue searching recursively.
+
+ bool foundSym = false;
+ for( ; !foundSym && m_pCurrSym != m_pSymEnd; ++m_pCurrSym) {
+ unsigned info = m_pCurrSym->st_info & 0xf; // Symbol type is lower 4 bits
+
+ if(STT_FUNC == info && m_pCurrSym->st_shndx == m_codeSecIdx) {
+ //printSymTableEntry(m_pCurrSym);
+
+ fname = m_pStrTab + m_pCurrSym->st_name;
+
+ // NB: Address range contains the (closed) memory interval [start,end] of
+ // the memory addresses of function with symbol fname.
+
+ range.first = m_pCurrSym->st_value;
+ range.second = m_pCurrSym->st_value + m_pCurrSym->st_size - instWidth;
+ foundSym = true;
+ }
+ }
-void ElfReader::LocateSections()
-{
- // Obtain the base pointer to the string table containing the names of the
- // sections in the file.
+ if(foundSym)
+ return true;
- Elf_Data* strTabHand = elf_getdata(elf_getscn(m_elfDes, m_elfHdr->e_shstrndx), 0);
- assert(strTabHand && "Couldn't obtain ELF data handle to section-name string table");
- char* secNameTable = (char*) strTabHand->d_buf;
+ // We have processed all of the symbols in this file.
+ nextFile();
+ return findNextSymbol(fname, range, instWidth);
+}
+void ElfReader::locateSections()
+{
// Examine the section header of each section, looking for:
// a) The symbol table. When found ensure that it is unique, otherwise, assert out.
// b) The code segment (i.e. the segment which has the spec-defined name ".text"
bool codeSegmentFound = false;
Elf64_Shdr* secHdr;
- for(Elf_Scn* currScn = 0; (currScn = elf_nextscn(m_elfDes, currScn)); ) {
+ for(Elf_Scn* currScn = 0; (currScn = elf_nextscn(m_pCurrElf, currScn)); ) {
secHdr = elf64_getshdr(currScn);
assert(secHdr && "Unable to obtain section header");
if(SHT_SYMTAB == secHdr->sh_type) {
// Found section marked as a symbol table
- assert(!m_symTab && "Should only be one symbol table in the image");
- m_symTab = secHdr;
+ assert(!m_pSymHdr && !m_pSymSec && "Should only be one symbol table in the image");
+ m_pSymHdr = secHdr;
+ m_pSymSec = currScn;
}
else if(SHT_PROGBITS == secHdr->sh_type) {
if(!codeSegmentFound) {
- // Found section marked as "program-defined". Obtain section name and
- // see if it matches the name of the code segment.
-
- char* sectionName = secNameTable + secHdr->sh_name;
+ // Found section marked as "program-defined". Obtain section name and see
+ // if it matches the name of the code segment.
+ char* sectionName = m_pSecNameTab + secHdr->sh_name;
if(sm_codeSegmentName == sectionName) {
- m_codeSectionIdx = elf_ndxscn(currScn);
+ m_codeSecIdx = elf_ndxscn(currScn);
codeSegmentFound = true;
}
}
@@ -141,10 +316,8 @@
// NB: May need to look for SHT_DYNSYM here later on.
}
- assert(m_symTab && "Couldn't locate symbol table (stripped executable?)");
- assert(codeSegmentFound && "Couldn't locate code segment");
-}
+ assert(m_pSymHdr && m_pSymSec &&
+ "Couldn't locate symbol table (stripped executable?)");
-ElfReader::~ElfReader()
-{
+ assert(codeSegmentFound && "Couldn't locate code segment");
}
Index: llvm/lib/Reoptimizer/Inst/lib/Phases.cpp
diff -u llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.21 llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.22
--- llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.21 Wed Apr 30 17:42:36 2003
+++ llvm/lib/Reoptimizer/Inst/lib/Phases.cpp Sun May 4 15:01:30 2003
@@ -176,7 +176,7 @@
// functions here -- this could be quite large.
vector<std::pair<std::string, AddressRange> > funcs;
- while(elfReader.GetNextFunction(funcName, range, m_pIM->getInstWidth()))
+ while(elfReader.findNextSymbol(funcName, range, m_pIM->getInstWidth()))
funcs.push_back(std::make_pair(funcName, range));
cerr << "There are " << funcs.size() << " functions to process." << endl << endl;
@@ -185,10 +185,16 @@
for(vector<std::pair<std::string, AddressRange> >::iterator i = funcs.begin(),
e = funcs.end(); i != e; ++i) {
+
+#if 1
+ cerr << i->first << endl;
+
+#else
if(i->first == "fibs") {
cerr << "Transforming function " << i->first << "..." << endl;
transformFunction(i->second);
}
+#endif
}
cerr << "============================== End Phase 2 ==============================\n";
More information about the llvm-commits
mailing list