[llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/ElfReader.cpp ElfReader.h PerfInst.cpp design.txt

Thu Apr 3 15:01:01 PST 2003

Changes in directory llvm/lib/Reoptimizer/Inst:

ElfReader.cpp updated: 1.1 -> 1.2
ElfReader.h updated: 1.1 -> 1.2
PerfInst.cpp updated: 1.3 -> 1.4
design.txt updated: 1.4 -> 1.5

---
Log message:




---
Diffs of the changes:

Index: llvm/lib/Reoptimizer/Inst/ElfReader.cpp
diff -u llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.1 llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.2

--- llvm/lib/Reoptimizer/Inst/ElfReader.cpp:1.1	Mon Mar 31 11:48:07 2003
+++ llvm/lib/Reoptimizer/Inst/ElfReader.cpp	Thu Apr  3 15:00:51 2003
@@ -83,66 +83,12 @@
         if(STT_FUNC == (sym.st_info & 0xf)) { // Symbol type is lower 4 bits
             fname = m_strTab + sym.st_name;
             addressRange.first = sym.st_value;
-            addressRange.second = sym.st_value + sym.st_size;
+            addressRange.second = sym.st_value + sym.st_size - 4;
             return true;
         }
     }
 
     return false;
-}
-
-void ElfReader::DumpFunctions(std::ostream& ostr) 
-{
-    if(!m_symTab)
-        LocateSymbolTable();
-
-    // tmp
-    ostr << "Dumping functions from linker symbol table" << endl;
-    Elf_Data* data = elf_getdata(elf_getscn(m_elfDes, m_elfHdr->e_shstrndx), 0);
-    char* secName = (char*) data->d_buf + m_symTab->sh_name;
-    ostr << "Section name of symtable is: " << secName << endl;
-    // tmp
-
-    // Obtain ptr to string table associated with the symbol table.
-    Elf_Data* strTabHand = elf_getdata(elf_getscn(m_elfDes, m_symTab->sh_link), 0);
-    assert(strTabHand && "Couldn't obtain ELF data handle to string table");
-    char* strTab = (char*) strTabHand->d_buf;
-
-    // Determine the size of each entry and the number of entries in the symbol table
-    int entrySize = m_symTab->sh_entsize;
-    int numEntries = m_symTab->sh_size / entrySize;
-    assert(m_symTab->sh_size % entrySize == 0 && "Symtable size must be multiple of entry size");
-    ostr << "Symbol table contains " << numEntries << " entries" << endl;
-
-    // Seek to the start of the symbol table in the file
-    if(lseek(m_execFD, m_symTab->sh_offset, SEEK_SET) < 0)
-        assert(0 && "Couldn't seek to start of symbol table");
-
-    // Scan for entries of type STT_FUNC (which denote symtable table entries that
-    // correspond to function entry points), and dump information about each one of them.
-
-    Elf64_Sym sym;
-    int numFunc = 0;
-    for(int currEnt = 0; currEnt < numEntries; ++currEnt) {
-        int rdcnt = 0;
-        do {
-            rdcnt = read(m_execFD, &sym + rdcnt, entrySize);
-        } while(rdcnt < entrySize);
-        
-        if(STT_FUNC == (sym.st_info & 0xf)) { // Symbol type is lower 4 bits
-            numFunc++;
-
-            //ostr << (strTab + sym.st_name) << "[";
-            //fprintf(stderr, "Address is %lx", sym.st_value);
-            //fflush(stderr);
-            //ostr << "], size " << sym.st_size << endl;
-
-            ostr << "Function name is: " << (strTab + sym.st_name) << endl;
-            
-        }
-    }
-
-    //ostr << "Done, encountered " << numFunc << " functions" << endl;
 }
 
 void ElfReader::LocateSymbolTable()


Index: llvm/lib/Reoptimizer/Inst/ElfReader.h
diff -u llvm/lib/Reoptimizer/Inst/ElfReader.h:1.1 llvm/lib/Reoptimizer/Inst/ElfReader.h:1.2
--- llvm/lib/Reoptimizer/Inst/ElfReader.h:1.1	Mon Mar 31 11:48:34 2003
+++ llvm/lib/Reoptimizer/Inst/ElfReader.h	Thu Apr  3 15:00:51 2003
@@ -19,7 +19,6 @@
     typedef std::pair<uint64_t, uint64_t> AddressRange;
 
     bool GetNextFunction(std::string& string, AddressRange& range);
-    void DumpFunctions(std::ostream& ostr);
     
   private:
     ElfReader() {}


Index: llvm/lib/Reoptimizer/Inst/PerfInst.cpp
diff -u llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.3 llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.4
--- llvm/lib/Reoptimizer/Inst/PerfInst.cpp:1.3	Mon Mar 31 11:48:34 2003
+++ llvm/lib/Reoptimizer/Inst/PerfInst.cpp	Thu Apr  3 15:00:51 2003
@@ -16,6 +16,7 @@
 #include <vector>
 
 #include "ElfReader.h"
+#include "../BinInterface/sparcdis.h"
 
 using std::vector;
 using std::cerr;
@@ -33,7 +34,32 @@
     std::string funcName;
     ElfReader::AddressRange range;
     while(elfReader.GetNextFunction(funcName, range)) {
-        cerr << "Function name is: " << funcName << endl;
+        if(funcName == "main") {
+            cerr << "Function name is: " << funcName << endl;
+            cerr << "\tAddress range is ["; 
+            fprintf(stderr, "%lx, %lx]", range.first, range.second);
+            cerr << endl;
+
+            cerr << "Dumping BinInterface-generated disasm:" << endl;
+
+            for(unsigned* inst = (unsigned*)((void*) range.first),
+                    *end = (unsigned*)((void*) range.second); inst <= end; ++inst){
+                printf("%lx:\t%8x\t", (uint64_t) inst, *inst);
+                sparc_print(*inst);
+                printf("\n");
+                fflush(stdout);
+            }
+            
+#if 0
+            cerr << "First instruction in function: " << endl;
+            void* ptr = (void*) range.first;
+            unsigned inst = *((uint32_t*)((void*) range.first));
+            fprintf(stderr, "%x\n", inst);
+            cerr << "Disassembly is: ";
+            sparc_print(inst);
+            fflush(stdout);
+#endif
+        }
     }
     
     cerr << "============================== End Phase 2 ==============================\n";    


Index: llvm/lib/Reoptimizer/Inst/design.txt
diff -u llvm/lib/Reoptimizer/Inst/design.txt:1.4 llvm/lib/Reoptimizer/Inst/design.txt:1.5
--- llvm/lib/Reoptimizer/Inst/design.txt:1.4	Mon Mar 31 11:48:34 2003
+++ llvm/lib/Reoptimizer/Inst/design.txt	Thu Apr  3 15:00:51 2003
@@ -653,6 +653,79 @@
 
 }}}
 
+{{{ MEETING MINUTES 03 Apr 3003
+
+New definition of different phases:
+
+Phase 1: 
+
+Same as before but inserts ONE call to phase 2 in main.
+
+Phase 2:
+
+Using the ELF symbol table, iterate over *every* function (can we restrict
+ourselves to only the code in the text segment? I sure hope so) and attempts to
+locate its pad.  If the pad is not found, the function has not been instrumented
+and we don't care about it.  
+
+For functions where only one pad is encountered (common case), the format is:
+
+   entry instruction
+   ...
+   paddedRegion start
+   ...
+   paddedRegion end
+   return code
+
+The new code looks like:
+
+branch to padded region start + return code size
+...
+return code (copied from end of function; this is at the padded region start location)
+entry instruction (target of inserted branch)
+[padded region contents]
+return code
+
+Must decide how to handle functions where two (or more) pads are encountered.  I
+have a sinking feeling that we will have to use both pads because we cannot
+dynamically grow the pad regions and there is cause for their total combined
+capacity to be there. We may have to devise some special action on the part of
+phase 2 to "distribute" the branches to phase 3 across the multiple pads.
+
+Phase 3:
+
+Isn't really a "phase"; rather, it will executes the code written by phase 2.
+However, this may result in transformation of the code because if we have a
+load-volatile candidate that is a false positive, the original load instruction
+will be restored in place of the branch into the base tramp. 
+
+----
+
+Discussed a "signature" for load-volatiles. In C, the instrumentation locations
+will be denoted with:
+
+volatile char gvVar1;
+volatile short gvVar2;
+gvVar1 = gvVar2;
+
+The reason for the size difference is so that we get a load half-word, store
+byte instruction pair which can be searched for by phase2:
+
+ldh [%o0], %rn
+...
+stb rn, [%o5]
+
+or whatever. These will be automatically selected as candidates for
+branch-replacement, which means that phase 2 will:
+
+a) overwrite the load with the branch down into the pad
+b) nop over the store 
+c) save both instructions for restoring later if false positive (how? in pad?)
+
+First order of business is to actually FIND the pad extents.
+
+}}}
+
 {{{ IMPLEMENTATION SKETCH
 
     {{{ Current implementation sketch:
@@ -794,36 +867,11 @@
 
 - Read EEL paper to get a better feel for binary modification issues
 
+{{{ OLD PHASE DESCRIPTION 
+
 - Use the existing mechanisms at your disposal
   (ELF/tracecache/BinInterface/VirtualMem/etc) to do the following.
 
-  In phase 1:
-
-      Complete the remainder of the phase-1 actions: building the GBT, handling
-      the sigfuns properly (i.e. adding a pair-of-sigfuns mechanism even for
-      point metrics), compare against by-hand example for phase 1 actions, etc.
-
-      At the end of each instrumented function, immutably pad with a large
-      enough pad region. {Propose doing this as a for loop containing immutable
-      loads}
- 
-  On program startup ("phase 2" function called from main()):
-
-      [check] mmap or otherwise load the ELF representation of the program and
-      acquire an ELF descriptor (etc) that will be persistent throughout the
-      program's execution.
-
-      Collect address ranges for all functions, so that when a particular
-      load-volatile instruction is encountered, it can be determined what
-      function it ended up being in.  I think that these should be the same
-      virtual addresses as seen within the context of the executing code, but
-      this should be verified.
-
-      ^^^ At this point, the application should be running and, at RUNTIME, spit
-      out (at the very least) the function boundary addresses; preferably, it
-      can spit out the BinInterface-obtained disassembly as well so that we can
-      compare it against the static disassembly.
-
       For each function, locate the load-volatile instructions that define
       interval and point metrics (potentially recording some information about
       them for later use); also find the padding region at the end of the
@@ -847,6 +895,19 @@
       trampoline executes the first instruction and then calls the Phase 3
       routine to instrument the function.]
 
+      Scratch that. I think this needs to be rephrased again to (assuming we
+      have only one pad region in the function body:
+
+      For each function, find the load-volatile instructions that define the
+      padded region so we know where it is.  Then, replace the first instruction
+      in the function w/ a branch down to the padded region. The padded region
+      contains and indirect branch to a dynamically-allocated body of code into
+      which the entire function body is copied.  Phase 2 then manipulates the
+      code in the copied region, replacing candidate load-volatiles w/ if/else
+      blocks that call the appropriate instrumentation function if the
+      load-volatile is actually an instrumentation function or executing the
+      original code otherwise.
+
   On phase 3 transformation function invocation:
 
       Performs all of tracecache-like magic, copying the original code to a
@@ -856,6 +917,47 @@
       accomplish this step, we must first determine how to make the branch- and
       call-maps that the TraceCache addTrace() routine(s) require, and how to
       otherwise use the existing tracecache stuff to accomplish what we want.
+
+}}}
+
+{{{ NEW PHASE DESCRIPTION
+
+Notes on using the total-copy approach in the prototype implementation.
+
+Note that we will need to use the total-copy approach as a "fall-back" from the
+dummy function (or padded region approach) in the following cases:
+
+a) The dummy function is outside the PC-relative distance
+
+b) The number of candidate instructions exceeds the fixed-size of dummy function
+(or of the padding region). This may happen fairly easily, it seems to me,
+esp. since we can't really estimate the frequency of our "load half-word, store
+byte pattern".
+
+Since the padding region has problems (sorta complex "determining the load
+instructions" heuristic, but worse, what happens with inlining?), we'd decided
+to go with the dummy function approach.
+
+However, the copy solution will still have to be used in a robust implementation
+as a fall-back, so I intend to implement that first.  Using the "copy-always"
+approach increases the intial overhead (which we don't *really* care about, but
+is important) and increases the memory footprint (but not the working-set
+size).  These seem like inefficiencies that aren't so egregious that they ought
+not exist in the prototype version.
+
+Before I do the heap-managed copy-always approach, however, I should look into
+the trace cache capabilities and see if there is anything there that will
+significantly help me out. It's sufficient to do a dummy-function-only prototype
+as well (for similar reasons), but it's not as flexible.  
+
+The tradeoff is a less general, more efficient implementation vs. a more
+general, less efficient implementation.  The most general, most efficient
+implementation may not be obtainable in the short term, but it's reasonable to
+try for.
+
+
+}}}
+
 }}}
 
 {{{ COMPLETED TODO ITEMS