[llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/lib/InstManip.h PhaseInfo.h Phases.cpp SparcInstManip.cpp SparcInstManip.h design.txt

Sun May 18 12:46:00 PDT 2003

Changes in directory llvm/lib/Reoptimizer/Inst/lib:

InstManip.h updated: 1.17 -> 1.18
PhaseInfo.h updated: 1.7 -> 1.8
Phases.cpp updated: 1.31 -> 1.32
SparcInstManip.cpp updated: 1.11 -> 1.12
SparcInstManip.h updated: 1.11 -> 1.12
design.txt updated: 1.14 -> 1.15

---
Log message:

Phase-5 heap regions are working properly.


---
Diffs of the changes:

Index: llvm/lib/Reoptimizer/Inst/lib/InstManip.h
diff -u llvm/lib/Reoptimizer/Inst/lib/InstManip.h:1.17 llvm/lib/Reoptimizer/Inst/lib/InstManip.h:1.18

--- llvm/lib/Reoptimizer/Inst/lib/InstManip.h:1.17	Mon May 12 21:00:22 2003
+++ llvm/lib/Reoptimizer/Inst/lib/InstManip.h	Sun May 18 12:45:25 2003
@@ -54,7 +54,8 @@
     enum LogicalRegister {
         REG_0,
         REG_1,
-        REG_2
+        REG_2,
+        REG_7
     };
 
     // buildSlot - Fill the provided vector with the instructions that go into the slot
@@ -79,7 +80,7 @@
 
     virtual unsigned getSlotSize(Phase2* p2) const = 0;
     virtual unsigned getSlotSize(Phase3* p3, InstCandidate& cand) const = 0;
-    virtual unsigned getSlotSize(Phase4* p4, pp::GBTEntryType type) const = 0;
+    virtual unsigned getSlotSize(Phase4* p4) const = 0;
 
     // findCandidates - Build the vector of instruction candidates that occur in the
     // region defined by the given addresses. This is necessarily a platform-dependent


Index: llvm/lib/Reoptimizer/Inst/lib/PhaseInfo.h
diff -u llvm/lib/Reoptimizer/Inst/lib/PhaseInfo.h:1.7 llvm/lib/Reoptimizer/Inst/lib/PhaseInfo.h:1.8
--- llvm/lib/Reoptimizer/Inst/lib/PhaseInfo.h:1.7	Tue May 13 13:36:33 2003
+++ llvm/lib/Reoptimizer/Inst/lib/PhaseInfo.h	Sun May 18 12:45:25 2003
@@ -15,12 +15,13 @@
 
 #define DEBUG 1
 #if DEBUG
-#define VERBOSE 2
+// Maximum verbosity is level 5
+#define VERBOSE 4
 #define DEBUG_MSG(v, x) if(VERBOSE >= v) std::cerr << "[pp] " << x
 #define HEX(x) std::hex << x << std::dec
 #else
 #define DEBUG_MSG(v, x)
-#define HEX(x)
+#define HEX(x) ""
 #endif
 
 typedef std::pair<uint64_t, uint64_t> AddressRange;


Index: llvm/lib/Reoptimizer/Inst/lib/Phases.cpp
diff -u llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.31 llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.32
--- llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.31	Wed May 14 07:43:04 2003
+++ llvm/lib/Reoptimizer/Inst/lib/Phases.cpp	Sun May 18 12:45:25 2003
@@ -61,15 +61,12 @@
 #include "llvm/Reoptimizer/MemoryManager.h"
 #include "llvm/Reoptimizer/TraceCache.h"
 #include "llvm/Reoptimizer/VirtualMem.h"
+#include "llvm/Reoptimizer/InstrUtils.h"
 
 #include "InstManip.h"
 #include "PhaseInfo.h"
 #include "SparcInstManip.h"
 
-// tmp
-#include "llvm/Reoptimizer/InstrUtils.h"
-// tmp
-
 using std::vector;
 using std::cerr;
 using std::endl;
@@ -193,11 +190,14 @@
         if(m_excludeSet.find(i->first) == m_excludeSet.end()) {
             // Function is not in exclude set, so go ahead and transform it
 
-            DEBUG_MSG(1, "Transforming function " << i->first
-                      << "[" << HEX(i->second.first)
-                      << ", " << HEX(i->second.second) << "]...\n");
+            // FIXME XXX TODO Get rid of this 'if' statement
+            if(i->first == "fibs") {
+                DEBUG_MSG(1, "Transforming function " << i->first
+                          << "[" << HEX(i->second.first)
+                          << ", " << HEX(i->second.second) << "]...\n");
 
-            transformFunction(i->second);
+                transformFunction(i->second);
+            }
         }
     }
 
@@ -215,6 +215,7 @@
         vm->writeInstToVM(currAddr, *i);
         currAddr += im->getInstWidth();
     }
+    ::doFlush(slotBase, slotBase + im->getInstWidth() * snippet.size());
 }
 
 static uint64_t replaceInstWithBrToSlot(uint64_t srcAddr,
@@ -228,6 +229,7 @@
 
     // Replace instruction at srcAddr with branch to start of new slot
     tc->getVM()->writeInstToVM(srcAddr, im->getBranchAlways(slotBase, srcAddr));
+    ::doFlush(srcAddr, srcAddr + im->getInstWidth());
 
     return slotBase;
 }
@@ -267,8 +269,7 @@
 
     // Replace instruction at repInstAddr with a branch to start of a new slot.
     uint64_t slotBase = replaceInstWithBrToSlot(repInstAddr, slotSize, m_pTC, m_pIM);
-    //::doFlush(range.first, range.second);
-    
+
 #if 0
 #if VERBOSE > 3
     DEBUG_MSG(4, "printing function after writing branch-to-slot\n");
@@ -318,8 +319,8 @@
     // as well).
 
     m_pTC->getVM()->writeInstToVM(p3info->getReplaceAddr(), p3info->getOrigInst());
-//     ::doFlush(m_pPhase3Info->getReplaceAddr(),
-//               m_pPhase3Info->getReplaceAddr() + m_pIM->getInstWidth());
+    ::doFlush(m_pPhase3Info->getReplaceAddr(),
+              m_pPhase3Info->getReplaceAddr() + m_pIM->getInstWidth());
 }
 
 Phase3::~Phase3() 
@@ -447,12 +448,11 @@
 
 void Phase4::transform()
 {
-
 #if VERBOSE > 0
     dumpGBT(cerr);
 #endif
     DEBUG_MSG(1, "tag is " << HEX(m_tag) << ", and ");
-
+    
     if(GBTElem* gbte = searchGBT(m_tag)) {
         DEBUG_MSG(1, "matches.\n");
 
@@ -469,9 +469,9 @@
                 e = cand.getInsts().end(); i != e; ++i)
             vm->writeInstToVM(i->first, m_pIM->getNOP());
 
-        // Obtain memory (& rewrite branch) to the phase 5 slot.
+        // Obtain memory (& rewrite branch) to the phase 5 jump slot.
         
-        unsigned slotSize = m_pIM->getSlotSize(this, (pp::GBTEntryType) gbte->gbtType);
+        unsigned slotSize = m_pIM->getSlotSize(this);
         uint64_t repAddr = cand.front().first;
         uint64_t slotBase = replaceInstWithBrToSlot(repAddr, slotSize, m_pTC, m_pIM);
 


Index: llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.cpp
diff -u llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.cpp:1.11 llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.cpp:1.12
--- llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.cpp:1.11	Wed May 14 07:43:04 2003
+++ llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.cpp	Sun May 18 12:45:26 2003
@@ -56,12 +56,23 @@
 #include <iostream>
 #include <iomanip>
 
+#include <sys/mman.h> // mprotect()
+
+#include <stdio.h>
+void perror(const char* s);
+#include <errno.h>
+int errno;
+
+#include <unistd.h> // valloc()
+
 #include "llvm/Reoptimizer/TraceCache.h"
 #include "llvm/Reoptimizer/VirtualMem.h"
 #include "llvm/Reoptimizer/MemoryManager.h"
 #include "llvm/Reoptimizer/BinInterface/sparc9.h"
 #include "llvm/Reoptimizer/BinInterface/bitmath.h"
 
+#include "llvm/Reoptimizer/InstrUtils.h"
+
 #include "SparcInstManip.h"
 #include "PhaseInfo.h"
 
@@ -91,6 +102,7 @@
     m_logicalToActualReg[REG_0] = R_O0;
     m_logicalToActualReg[REG_1] = R_O1;
     m_logicalToActualReg[REG_2] = R_O2;
+    m_logicalToActualReg[REG_7] = R_O7;
 
     // Populate output->input register map. This is SparcV9 specific and corresponds to
     // the register mapping that occurs after a 'save' instruction is issued. Shared and
@@ -180,53 +192,60 @@
                                std::vector<unsigned>& snippet)
 {
     unsigned sharedSize = WORD_WIDTH * getSharedSize();
-    unsigned stkSize = STKFRM_MIN + sharedSize + WORD_WIDTH * 2;
+
+    // WORD_WIDTH * 3 belows occurs because we need two words for saving the values of the
+    // two scratch registers, and 1 word for saving the return address in the jump slot.
+    unsigned stkSize = STKFRM_MIN + sharedSize + WORD_WIDTH * 3;
 
     if(stkSize % STACK_ALIGN != 0) {
-        // Pad up to next multiple of STACK_ALIGN; assumes STACK_ALIGN = 2 * WORD_WIDTH
+        // Pad up to next multiple of STACK_ALIGN; assumes STACK_ALIGN % WORD_WIDTH == 0
         stkSize += WORD_WIDTH;
         assert(stkSize % STACK_ALIGN == 0 && "Alignment adjustment failed");
     }
     
-    DEBUG_MSG(2, "buildStartSlot stack offset is " << stkSize << endl);
+    DEBUG_MSG(2, "buildSlot(p5) stack offset is " << stkSize << endl);
 
-    // For start interval sites, heap-allocate enough memory for the return value of the
-    // instrumentation function.
+    unsigned retAddrStkOff = STKFRM_MIN + sharedSize + 2 * WORD_WIDTH;
+    unsigned* heapSlot = buildPhase5HeapSlot(gbte, sharedSize, retAddrStkOff);
+
+    // If we're dealing with a start-interval instrumentation function, heap-allocate
+    // its parameter memory
 
     if(gbte->gbtType == pp::GBT_INTERVAL_START) {
         assert(!gbte->retVal && "Expected null retVal value");
         gbte->retVal = static_cast<void*>(new char[gbte->paramSize]);
     }
 
-    // After the alloca, our stack region looks like:
+    ////////////////
+    // Construct the phase 5 jump slot
+
+    startCode(snippet);
+
+    // After the SP sub, our stack region looks like:
     //     sp + BIAS + stkSize -> +--------------------------------+
-    //                            |       alignment padding        |
+    //                            | alignment padding (if needed)  |
+    //                            +--------------------------------+
+    //                            | ret addr for use by heap slot  | } WORD_WIDTH
     //                            +--------------------------------+
     //                            | save area for clobbered regs   | } WORD_WIDTH * 2
     //                            +--------------------------------+
     //                            | spill region for shared regs   | } sharedSize
     //  sp + BIAS + STKFRM_MIN -> +--------------------------------+
-    //  
-    
-    startCode(snippet);
+    //
 
     generateSPSub(stkSize);
 
-    // "Manually" save REG_0, REG_1
+    // Save registers REG_{0,7} so we don't have to issue a 'save'
     generateStackStore(REG_0, STKFRM_MIN + sharedSize);
-    generateStackStore(REG_1, STKFRM_MIN + sharedSize + WORD_WIDTH);
-
-    generateSpillShared(STKFRM_MIN);
-
-    generateLoad((uint64_t) gbte, REG_0, REG_1); // REG_0 live to call
-    generateStackStore(REG_0, PARAM_0);
+    generateStackStore(REG_7, STKFRM_MIN + sharedSize + WORD_WIDTH);
 
-    generateCall((uint64_t) &phase5, slotBase);
-    generateRestoreShared(STKFRM_MIN);
+    // Address of jmpl instruction will be in REG_7 at entry to heapSlot.
+    // This means that REG_7 is live until its use in heapSlot.
+    generateLdJmpl((uint64_t) heapSlot, REG_0, REG_7);
 
-    // "Manually" restore REG_0, REG_1
+    // Restore registers REG_{0,7} so we don't have to issue a 'restore'
     generateStackLoad(REG_0, STKFRM_MIN + sharedSize);
-    generateStackLoad(REG_1, STKFRM_MIN + sharedSize + WORD_WIDTH);
+    generateStackLoad(REG_7, STKFRM_MIN + sharedSize + WORD_WIDTH);
 
     generateBranchAlways(instAddr + getInstWidth(), slotBase, getSPSub(-stkSize));
 
@@ -268,27 +287,129 @@
         GEN_BRANCH_ALWAYS_SIZE;
 }
 
-unsigned SparcInstManip::getSlotSize(Phase4* p4, pp::GBTEntryType type) const
+unsigned SparcInstManip::getSlotSize(Phase4* p4) const
 {
     // The following sum corresponds to the sizes consumed by the various regions of the
-    // the slot constructed by phase 4, called the phase 5 slot. See ASCII diagram of
-    // phase 5 slot contents for details.
+    // the slot constructed at phase 4, called the phase 5 jump slot. See ASCII diagram of
+    // phase 5 jump slot contents for details.
 
     (void) p4;
 
-    return GEN_SPSUB_SIZE +
+    return
+        GEN_SPSUB_SIZE +
         GEN_STKSTORE_SIZE +
         GEN_STKSTORE_SIZE +
-        GEN_SPL_STK_SIZE +
-        GEN_LOAD_SIZE +
         GEN_STKSTORE_SIZE +
-        GEN_CALL_SIZE +
-        GEN_UNSPL_STK_SIZE +
+        GEN_LDJMPL_SIZE +
+        GEN_STKLOAD_SIZE +
         GEN_STKLOAD_SIZE +
         GEN_STKLOAD_SIZE +
         GEN_BRANCH_ALWAYS_SIZE;
 }
 
+static void dumpHeapSlot(unsigned* heapSlot,
+                         unsigned size,
+                         SparcInstManip* im)
+{
+    // Dump actual instructions in memory
+    unsigned* curr = heapSlot;
+    unsigned* end = heapSlot + size;
+    for(; curr < end; ++curr) {
+        cerr << "[pp] Instruction at: " << HEX(curr) << ": ";
+        im->printInst(*curr);
+        cerr << endl;
+    }
+    DEBUG_MSG(3, "Done with heap region construction, moving on to jump slot\n");
+}
+
+static void copySnippetToHeapSlot(vector<unsigned>& snippet,
+                                  unsigned* heapSlot,
+                                  unsigned size)
+{
+    unsigned* curr = heapSlot;
+    for(unsigned i = 0, e = snippet.size(); i < e; ++i)
+        *curr++ = snippet[i];
+    assert(curr == heapSlot + size && "Size mismatch on heap-slot copy");
+
+    // flush; unlikely that we need to flush here, but it's *possible*
+    ::doFlush((uint64_t) heapSlot, (uint64_t)(heapSlot + size));
+}
+
+static void setPageBits(unsigned* pageBase,
+                        unsigned sizeBytes,
+                        int protBits)
+{
+    DEBUG_MSG(3, "Setting access bits on heap slot page(s)" << endl);
+
+    int rc = mprotect(pageBase, sizeBytes, protBits);
+    if(rc < 0) {
+        perror("Unable to set permissions on pageBase");
+        assert(0 && "setPageBits failed");
+    }
+}
+
+unsigned* SparcInstManip::buildPhase5HeapSlot(GBTElem* gbte,
+                                              unsigned sharedSize,
+                                              unsigned retAddrStkOff)
+{
+    // Grab memory for the phase 5 heap slot -- this is not freed, intentionally.  NB: We
+    // use valloc here to ensure that the heap region is aligned on page boundaries.  This
+    // is probably not portable, but it'll do for now.
+
+    unsigned numHeapBytes = getPhase5HeapSize() * sizeof(unsigned);
+    unsigned* heapSlot = (unsigned*) valloc(numHeapBytes);
+
+    assert(sizeof(unsigned) == getInstWidth() && "Unexpected instruction width");
+    assert(heapSlot && "Failed to obtain memory for phase 5 heap slot");
+    DEBUG_MSG(2, "heapSlot address is " << HEX(heapSlot) << endl);
+
+    ////////////////
+    // Construct the phase 5 heap slot
+
+    // NB: We save the return address of the jmp slot before the call so that we do not
+    // need to make our own stack frame.
+
+    vector<unsigned> heapcode;
+    startCode(heapcode);
+
+    generateSpillShared(STKFRM_MIN);
+    generateStackStore(REG_7, retAddrStkOff);
+    generateLoad((uint64_t) gbte, REG_0, REG_7); // REG_0 live to call
+    generateStackStore(REG_0, PARAM_0);
+    generateCall((uint64_t) &phase5, (uint64_t) heapSlot);
+    generateStackLoad(REG_7, retAddrStkOff);
+    generateRestoreShared(STKFRM_MIN);
+    generateJmpl(REG_7, REG_0 /* scratch */, WORD_WIDTH);
+
+    endCode();
+
+    copySnippetToHeapSlot(heapcode, heapSlot, getPhase5HeapSize());
+    setPageBits(heapSlot, numHeapBytes, PROT_READ | PROT_EXEC);
+
+#if VERBOSE > 2
+    DEBUG_MSG(3, "Dumping contents of heap-slot memory...\n");
+    dumpHeapSlot(heapSlot, getPhase5HeapSize(), this);
+#endif
+
+    return heapSlot;
+}
+
+unsigned SparcInstManip::getPhase5HeapSize() const
+{
+    // The following sum corresponds to the sizes consumed by the various regions of the
+    // the (heap-allocated) slot constructed during phase 4, called the phase 5 heap
+    // slot. See ASCII diagram of phase 5 heap slot contents for details.
+
+    return GEN_SPL_STK_SIZE +
+        GEN_STKSTORE_SIZE +
+        GEN_LOAD_SIZE +
+        GEN_STKSTORE_SIZE +
+        GEN_CALL_SIZE +
+        GEN_STKLOAD_SIZE +
+        GEN_UNSPL_STK_SIZE +
+        GEN_JMPL_SIZE;
+}
+
 void SparcInstManip::findCandidates(const std::pair<uint64_t, uint64_t>& range,
                                     vector<InstCandidate>& candidates) 
 {
@@ -350,7 +471,7 @@
     m_pCurrSnippet->push_back(MK_ADD_R_I(m_logicalToActualReg[reg], R_O6, offset + BIAS));
 
     assert(m_pCurrSnippet->size() - initSize == GEN_SPOFFSET_SIZE &&
-           "Unexpected number of instructions in code sequence for SP load");
+           "Unexpected number of instructions in code sequence for SP offset");
 }
 
 // generateSPSub - Generate code to allocate 'size' bytes on the stack
@@ -365,7 +486,7 @@
     m_pCurrSnippet->push_back(getSPSub(size));
 
     assert(m_pCurrSnippet->size() - initSize == GEN_SPSUB_SIZE &&
-           "Unexpected number of instructions in code sequence for SP add");
+           "Unexpected number of instructions in code sequence for SP sub");
 }
 
 void SparcInstManip::generateLoad(uint64_t value,
@@ -406,6 +527,45 @@
            "Unexpected number of instructions in code sequence for 64-bit value -> %dest");
 }
 
+void SparcInstManip::generateJmpl(LogicalRegister useForIndirect,
+                                     LogicalRegister returnAddrDest,
+                                     unsigned offset)
+{
+    assert(m_pCurrSnippet && "Invalid snippet for code generation");
+    unsigned initSize = m_pCurrSnippet->size();
+
+    m_pCurrSnippet->push_back(MK_JMPL_INDIRECT(m_logicalToActualReg[returnAddrDest],
+                                               m_logicalToActualReg[useForIndirect],
+                                               offset));
+
+    DEBUG_MSG(3, "JMPL instruction word is " << HEX(m_pCurrSnippet->back()) << endl);
+    m_pCurrSnippet->push_back(getNOP());
+
+    assert(m_pCurrSnippet->size() - initSize == GEN_JMPL_SIZE &&
+           "Unexpected number of instructions in code sequence for jmpl");
+}
+
+void SparcInstManip::generateLdJmpl(uint64_t destAddr,
+                                       LogicalRegister useForIndirect,
+                                       LogicalRegister returnAddrDest,
+                                       unsigned offset)
+{
+    // Generate code that loads the value of destAddr into useForIndirect, then jumps to
+    // [%useForIndirect + offset], storing the return address (i.e., the address of the
+    // jmpl instruction itself) in %returnAddrDest.
+    
+    assert(m_pCurrSnippet && "Invalid snippet for code generation");
+    unsigned initSize = m_pCurrSnippet->size();
+
+    // returnAddrDest is used as a scratch reg in the generateLoad
+    // After this call, %useForIndirect is live until after the jump
+    generateLoad(destAddr, useForIndirect, returnAddrDest);
+    generateJmpl(useForIndirect, returnAddrDest, offset);
+
+    assert(m_pCurrSnippet->size() - initSize == GEN_LDJMPL_SIZE &&
+           "Unexpected number of instructions in code sequence for ld & jmpl");
+}
+
 void SparcInstManip::generateAddressCopy(unsigned loadInst,
                                          LogicalRegister dest,
                                          bool afterSave)
@@ -453,7 +613,7 @@
     snippet.push_back(MK_LDX_STACK(m_logicalToActualReg[dest], BIAS + stkOffset));
 
     assert(snippet.size() - initSize == GEN_STKSTORE_SIZE &&
-           "Unexpected number of instructions in code sequence for parameter store");
+           "Unexpected number of instructions in code sequence for parameter load");
 }
 
 
@@ -509,7 +669,7 @@
            "Unexpected number of instructions in code sequence for restore");
 }
 
-void SparcInstManip::generateSave()
+void SparcInstManip::generateSave(unsigned offset)
 {
     assert(m_pCurrSnippet && "Invalid snippet for code generation");
     vector<unsigned>& snippet = *m_pCurrSnippet;
@@ -517,7 +677,7 @@
     unsigned initSize = snippet.size();    
 
     // save %sp, -176, %sp
-    snippet.push_back(MK_SAVE_IMM(R_O6, R_O6, -176));
+    snippet.push_back(MK_SAVE_IMM(R_O6, R_O6, -offset));
 
     assert(snippet.size() - initSize == GEN_SAVE_SIZE &&
            "Unexpected number of instructions in code sequence for save");
@@ -551,7 +711,7 @@
     ::generateRestoreShared(*m_pCurrSnippet, R_O6, WORD_WIDTH, offset + BIAS);
 
     assert(m_pCurrSnippet->size() - initSize == GEN_UNSPL_STK_SIZE &&
-           "Unexpected number of instructions in code sequence for spill to stack");
+           "Unexpected number of instructions in code sequence for unspill from stack");
 }
 
 void SparcInstManip::generateRestoreShared(uint64_t restoreFromAddr,


Index: llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.h
diff -u llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.h:1.11 llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.h:1.12
--- llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.h:1.11	Tue May 13 14:43:03 2003
+++ llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.h	Sun May 18 12:45:26 2003
@@ -35,7 +35,7 @@
 
     virtual unsigned getSlotSize(Phase2* p2) const;
     virtual unsigned getSlotSize(Phase3* p3, InstCandidate& cand) const;
-    virtual unsigned getSlotSize(Phase4* p4, pp::GBTEntryType type) const;
+    virtual unsigned getSlotSize(Phase4* p4) const;
 
     virtual void     findCandidates(const std::pair<uint64_t, uint64_t>& range,
                                     std::vector<InstCandidate>& candidates);
@@ -51,6 +51,12 @@
     typedef std::map<unsigned, unsigned>               OutputToInputRegMap;
     typedef std::map<GBTElem*, std::vector<uint64_t> > GBTStackMap;
 
+    unsigned*        buildPhase5HeapSlot(GBTElem* gbte,
+                                         unsigned sharedSize,
+                                         unsigned retAddrStkOff);
+    
+    unsigned         getPhase5HeapSize() const;
+
     void             startCode(std::vector<unsigned>& snippet) { m_pCurrSnippet = &snippet; }
     void             endCode()                                 { m_pCurrSnippet = 0;        }
                      
@@ -66,6 +72,15 @@
     void             generateSPOffset(LogicalRegister reg, unsigned offset);
     void             generateSPSub(unsigned size);
 
+    void             generateJmpl(LogicalRegister useForIndirect,
+                                  LogicalRegister returnAddrDest,
+                                  unsigned offset = 0);
+
+    void             generateLdJmpl(uint64_t destAddr,
+                                    LogicalRegister useForIndirect,
+                                    LogicalRegister returnAddrDest,
+                                    unsigned offset = 0);
+    
     void             generateLoad(uint64_t value,
                                   LogicalRegister dest,
                                   LogicalRegister tmp);
@@ -73,7 +88,7 @@
     void             generateStackStore(LogicalRegister src, unsigned stkOffset);
     void             generateStackLoad(LogicalRegister dest, unsigned stkOffset);
     void             generateRestore();
-    void             generateSave();
+    void             generateSave(unsigned offset = STKFRM_MIN);
                      
     void             generateRestoreShared(uint64_t restoreFromAddr,
                                            LogicalRegister tmp1 = REG_0,
@@ -155,6 +170,8 @@
     static const unsigned GEN_UNSPL_STK_SIZE =     GEN_SPL_STK_SIZE;
     static const unsigned GEN_SPSUB_SIZE =         1;
     static const unsigned GEN_SPOFFSET_SIZE =      1;
+    static const unsigned GEN_JMPL_SIZE =          2;
+    static const unsigned GEN_LDJMPL_SIZE =        GEN_LOAD_SIZE + GEN_JMPL_SIZE;
 };
 
 unsigned SparcInstManip::getBranchAlways(uint64_t dest, uint64_t pc, bool annul) const


Index: llvm/lib/Reoptimizer/Inst/lib/design.txt
diff -u llvm/lib/Reoptimizer/Inst/lib/design.txt:1.14 llvm/lib/Reoptimizer/Inst/lib/design.txt:1.15
--- llvm/lib/Reoptimizer/Inst/lib/design.txt:1.14	Mon May 12 21:00:23 2003
+++ llvm/lib/Reoptimizer/Inst/lib/design.txt	Sun May 18 12:45:26 2003
@@ -1441,3 +1441,24 @@
 
 }}}
 
+{{{ Suggestion from Chris
+
+FYI: Chris suggested a very interesting idea regarding an alternative to the
+very hackish dummy function approach.  If we made a "custom" linker, we'd be
+able to rewrite the functions with sufficient padding at the end.  If this
+occurred, we wouldn't need the dummy function at all anymore -- phase 2 would
+simply write branches down into the padded region instead of into the dummy
+function, and we'd be guaranteed to be within the PC-relative maximum branch
+distance except in the (exceedingly rare) cases where the function size was
+*really* big (i.e. >= 64K).  The only problem here is that we don't always know
+how big the padded region has to be -- in reality, it'd have to be big enough to
+contain the resolve-candidate slot code for *each* candidate in the scanned
+code.  The candidates could be located by the "custom" linker, and the padded
+regions could be made to be big enough, but the executable growth has the
+potential to be significant.  This unknown-size issue isn't unique to this
+padded approach, however, and remains a problem in the dummy function anyway.
+Also, Chris remarked that any novel page-management mechanisms, etc., (for the
+code that is jumped to in newly-allocated pages) that I devise should perhaps be
+integrated into the LLVM JIT if they are suitable.
+
+}}}