[llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/ElfReader.cpp ElfReader.h PerfInst.cpp design.txt
Joel Stanley
jstanley at cs.uiuc.edu
Thu Apr 3 15:01:01 PST 2003
Changes in directory llvm/lib/Reoptimizer/Inst:
ElfReader.cpp updated: 1.1 -> 1.2
ElfReader.h updated: 1.1 -> 1.2
PerfInst.cpp updated: 1.3 -> 1.4
design.txt updated: 1.4 -> 1.5
---
Log message:
---
Diffs of the changes:
Index: llvm/lib/Reoptimizer/Inst/ElfReader.cpp
diff -u llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.1 llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.2
--- llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.1 Mon Mar 31 11:48:07 2003
+++ llvm/lib/Reoptimizer/Inst/ElfReader.cpp Thu Apr 3 15:00:51 2003
@@ -83,66 +83,12 @@
if(STT_FUNC == (sym.st_info & 0xf)) { // Symbol type is lower 4 bits
fname = m_strTab + sym.st_name;
addressRange.first = sym.st_value;
- addressRange.second = sym.st_value + sym.st_size;
+ addressRange.second = sym.st_value + sym.st_size - 4;
return true;
}
}
return false;
-}
-
-void ElfReader::DumpFunctions(std::ostream& ostr)
-{
- if(!m_symTab)
- LocateSymbolTable();
-
- // tmp
- ostr << "Dumping functions from linker symbol table" << endl;
- Elf_Data* data = elf_getdata(elf_getscn(m_elfDes, m_elfHdr->e_shstrndx), 0);
- char* secName = (char*) data->d_buf + m_symTab->sh_name;
- ostr << "Section name of symtable is: " << secName << endl;
- // tmp
-
- // Obtain ptr to string table associated with the symbol table.
- Elf_Data* strTabHand = elf_getdata(elf_getscn(m_elfDes, m_symTab->sh_link), 0);
- assert(strTabHand && "Couldn't obtain ELF data handle to string table");
- char* strTab = (char*) strTabHand->d_buf;
-
- // Determine the size of each entry and the number of entries in the symbol table
- int entrySize = m_symTab->sh_entsize;
- int numEntries = m_symTab->sh_size / entrySize;
- assert(m_symTab->sh_size % entrySize == 0 && "Symtable size must be multiple of entry size");
- ostr << "Symbol table contains " << numEntries << " entries" << endl;
-
- // Seek to the start of the symbol table in the file
- if(lseek(m_execFD, m_symTab->sh_offset, SEEK_SET) < 0)
- assert(0 && "Couldn't seek to start of symbol table");
-
- // Scan for entries of type STT_FUNC (which denote symtable table entries that
- // correspond to function entry points), and dump information about each one of them.
-
- Elf64_Sym sym;
- int numFunc = 0;
- for(int currEnt = 0; currEnt < numEntries; ++currEnt) {
- int rdcnt = 0;
- do {
- rdcnt = read(m_execFD, &sym + rdcnt, entrySize);
- } while(rdcnt < entrySize);
-
- if(STT_FUNC == (sym.st_info & 0xf)) { // Symbol type is lower 4 bits
- numFunc++;
-
- //ostr << (strTab + sym.st_name) << "[";
- //fprintf(stderr, "Address is %lx", sym.st_value);
- //fflush(stderr);
- //ostr << "], size " << sym.st_size << endl;
-
- ostr << "Function name is: " << (strTab + sym.st_name) << endl;
-
- }
- }
-
- //ostr << "Done, encountered " << numFunc << " functions" << endl;
}
void ElfReader::LocateSymbolTable()
Index: llvm/lib/Reoptimizer/Inst/ElfReader.h
diff -u llvm/lib/Reoptimizer/Inst/ElfReader.h:1.1 llvm/lib/Reoptimizer/Inst/ElfReader.h:1.2
--- llvm/lib/Reoptimizer/Inst/ElfReader.h:1.1 Mon Mar 31 11:48:34 2003
+++ llvm/lib/Reoptimizer/Inst/ElfReader.h Thu Apr 3 15:00:51 2003
@@ -19,7 +19,6 @@
typedef std::pair<uint64_t, uint64_t> AddressRange;
bool GetNextFunction(std::string& string, AddressRange& range);
- void DumpFunctions(std::ostream& ostr);
private:
ElfReader() {}
Index: llvm/lib/Reoptimizer/Inst/PerfInst.cpp
diff -u llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.3 llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.4
--- llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.3 Mon Mar 31 11:48:34 2003
+++ llvm/lib/Reoptimizer/Inst/PerfInst.cpp Thu Apr 3 15:00:51 2003
@@ -16,6 +16,7 @@
#include <vector>
#include "ElfReader.h"
+#include "../BinInterface/sparcdis.h"
using std::vector;
using std::cerr;
@@ -33,7 +34,32 @@
std::string funcName;
ElfReader::AddressRange range;
while(elfReader.GetNextFunction(funcName, range)) {
- cerr << "Function name is: " << funcName << endl;
+ if(funcName == "main") {
+ cerr << "Function name is: " << funcName << endl;
+ cerr << "\tAddress range is [";
+ fprintf(stderr, "%lx, %lx]", range.first, range.second);
+ cerr << endl;
+
+ cerr << "Dumping BinInterface-generated disasm:" << endl;
+
+ for(unsigned* inst = (unsigned*)((void*) range.first),
+ *end = (unsigned*)((void*) range.second); inst <= end; ++inst){
+ printf("%lx:\t%8x\t", (uint64_t) inst, *inst);
+ sparc_print(*inst);
+ printf("\n");
+ fflush(stdout);
+ }
+
+#if 0
+ cerr << "First instruction in function: " << endl;
+ void* ptr = (void*) range.first;
+ unsigned inst = *((uint32_t*)((void*) range.first));
+ fprintf(stderr, "%x\n", inst);
+ cerr << "Disassembly is: ";
+ sparc_print(inst);
+ fflush(stdout);
+#endif
+ }
}
cerr << "============================== End Phase 2 ==============================\n";
Index: llvm/lib/Reoptimizer/Inst/design.txt
diff -u llvm/lib/Reoptimizer/Inst/design.txt:1.4 llvm/lib/Reoptimizer/Inst/design.txt:1.5
--- llvm/lib/Reoptimizer/Inst/design.txt:1.4 Mon Mar 31 11:48:34 2003
+++ llvm/lib/Reoptimizer/Inst/design.txt Thu Apr 3 15:00:51 2003
@@ -653,6 +653,79 @@
}}}
+{{{ MEETING MINUTES 03 Apr 3003
+
+New definition of different phases:
+
+Phase 1:
+
+Same as before but inserts ONE call to phase 2 in main.
+
+Phase 2:
+
+Using the ELF symbol table, iterate over *every* function (can we restrict
+ourselves to only the code in the text segment? I sure hope so) and attempts to
+locate its pad. If the pad is not found, the function has not been instrumented
+and we don't care about it.
+
+For functions where only one pad is encountered (common case), the format is:
+
+ entry instruction
+ ...
+ paddedRegion start
+ ...
+ paddedRegion end
+ return code
+
+The new code looks like:
+
+branch to padded region start + return code size
+...
+return code (copied from end of function; this is at the padded region start location)
+entry instruction (target of inserted branch)
+[padded region contents]
+return code
+
+Must decide how to handle functions where two (or more) pads are encountered. I
+have a sinking feeling that we will have to use both pads because we cannot
+dynamically grow the pad regions and there is cause for their total combined
+capacity to be there. We may have to devise some special action on the part of
+phase 2 to "distribute" the branches to phase 3 across the multiple pads.
+
+Phase 3:
+
+Isn't really a "phase"; rather, it will executes the code written by phase 2.
+However, this may result in transformation of the code because if we have a
+load-volatile candidate that is a false positive, the original load instruction
+will be restored in place of the branch into the base tramp.
+
+----
+
+Discussed a "signature" for load-volatiles. In C, the instrumentation locations
+will be denoted with:
+
+volatile char gvVar1;
+volatile short gvVar2;
+gvVar1 = gvVar2;
+
+The reason for the size difference is so that we get a load half-word, store
+byte instruction pair which can be searched for by phase2:
+
+ldh [%o0], %rn
+...
+stb rn, [%o5]
+
+or whatever. These will be automatically selected as candidates for
+branch-replacement, which means that phase 2 will:
+
+a) overwrite the load with the branch down into the pad
+b) nop over the store
+c) save both instructions for restoring later if false positive (how? in pad?)
+
+First order of business is to actually FIND the pad extents.
+
+}}}
+
{{{ IMPLEMENTATION SKETCH
{{{ Current implementation sketch:
@@ -794,36 +867,11 @@
- Read EEL paper to get a better feel for binary modification issues
+{{{ OLD PHASE DESCRIPTION
+
- Use the existing mechanisms at your disposal
(ELF/tracecache/BinInterface/VirtualMem/etc) to do the following.
- In phase 1:
-
- Complete the remainder of the phase-1 actions: building the GBT, handling
- the sigfuns properly (i.e. adding a pair-of-sigfuns mechanism even for
- point metrics), compare against by-hand example for phase 1 actions, etc.
-
- At the end of each instrumented function, immutably pad with a large
- enough pad region. {Propose doing this as a for loop containing immutable
- loads}
-
- On program startup ("phase 2" function called from main()):
-
- [check] mmap or otherwise load the ELF representation of the program and
- acquire an ELF descriptor (etc) that will be persistent throughout the
- program's execution.
-
- Collect address ranges for all functions, so that when a particular
- load-volatile instruction is encountered, it can be determined what
- function it ended up being in. I think that these should be the same
- virtual addresses as seen within the context of the executing code, but
- this should be verified.
-
- ^^^ At this point, the application should be running and, at RUNTIME, spit
- out (at the very least) the function boundary addresses; preferably, it
- can spit out the BinInterface-obtained disassembly as well so that we can
- compare it against the static disassembly.
-
For each function, locate the load-volatile instructions that define
interval and point metrics (potentially recording some information about
them for later use); also find the padding region at the end of the
@@ -847,6 +895,19 @@
trampoline executes the first instruction and then calls the Phase 3
routine to instrument the function.]
+ Scratch that. I think this needs to be rephrased again to (assuming we
+ have only one pad region in the function body:
+
+ For each function, find the load-volatile instructions that define the
+ padded region so we know where it is. Then, replace the first instruction
+ in the function w/ a branch down to the padded region. The padded region
+ contains and indirect branch to a dynamically-allocated body of code into
+ which the entire function body is copied. Phase 2 then manipulates the
+ code in the copied region, replacing candidate load-volatiles w/ if/else
+ blocks that call the appropriate instrumentation function if the
+ load-volatile is actually an instrumentation function or executing the
+ original code otherwise.
+
On phase 3 transformation function invocation:
Performs all of tracecache-like magic, copying the original code to a
@@ -856,6 +917,47 @@
accomplish this step, we must first determine how to make the branch- and
call-maps that the TraceCache addTrace() routine(s) require, and how to
otherwise use the existing tracecache stuff to accomplish what we want.
+
+}}}
+
+{{{ NEW PHASE DESCRIPTION
+
+Notes on using the total-copy approach in the prototype implementation.
+
+Note that we will need to use the total-copy approach as a "fall-back" from the
+dummy function (or padded region approach) in the following cases:
+
+a) The dummy function is outside the PC-relative distance
+
+b) The number of candidate instructions exceeds the fixed-size of dummy function
+(or of the padding region). This may happen fairly easily, it seems to me,
+esp. since we can't really estimate the frequency of our "load half-word, store
+byte pattern".
+
+Since the padding region has problems (sorta complex "determining the load
+instructions" heuristic, but worse, what happens with inlining?), we'd decided
+to go with the dummy function approach.
+
+However, the copy solution will still have to be used in a robust implementation
+as a fall-back, so I intend to implement that first. Using the "copy-always"
+approach increases the intial overhead (which we don't *really* care about, but
+is important) and increases the memory footprint (but not the working-set
+size). These seem like inefficiencies that aren't so egregious that they ought
+not exist in the prototype version.
+
+Before I do the heap-managed copy-always approach, however, I should look into
+the trace cache capabilities and see if there is anything there that will
+significantly help me out. It's sufficient to do a dummy-function-only prototype
+as well (for similar reasons), but it's not as flexible.
+
+The tradeoff is a less general, more efficient implementation vs. a more
+general, less efficient implementation. The most general, most efficient
+implementation may not be obtainable in the short term, but it's reasonable to
+try for.
+
+
+}}}
+
}}}
{{{ COMPLETED TODO ITEMS
More information about the llvm-commits
mailing list