[llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/InstManip.cpp InstManip.h Phases.cpp design.txt

Joel Stanley jstanley at cs.uiuc.edu
Tue Apr 29 21:03:01 PDT 2003


Changes in directory llvm/lib/Reoptimizer/Inst:

InstManip.cpp updated: 1.8 -> 1.9
InstManip.h updated: 1.9 -> 1.10
Phases.cpp updated: 1.14 -> 1.15
design.txt updated: 1.10 -> 1.11

---
Log message:

* A logical -> actual register mapping mechanism exists
* Calling conventions are now adhered to
* Slots in the tracecache now obtain a new stack frame


---
Diffs of the changes:

Index: llvm/lib/Reoptimizer/Inst/InstManip.cpp
diff -u llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.8 llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.9
--- llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.8	Tue Apr 29 13:36:53 2003
+++ llvm/lib/Reoptimizer/Inst/InstManip.cpp	Tue Apr 29 21:08:42 2003
@@ -12,6 +12,9 @@
 #include "InstManip.h"
 
 const unsigned InstManip::NOP_INST = 0x01000000;
+const unsigned InstManip::BRANCH_ALWAYS_BASE = 0x10480000;
+const unsigned InstManip::BRANCH_ALWAYS_BASE_ANNUL = 0x30480000;
+const unsigned InstManip::BIAS = 2047;
 uint64_t InstManip::sm_phase3SpillRegion[InstManip::SHARED_SIZE];
 
 using std::cout;
@@ -43,7 +46,40 @@
     ostr << "}";
 }
 
-void InstManip::printRange(unsigned* start, unsigned* end) const
+InstManip::InstManip(VirtualMem* vm):
+    m_pVM(vm),
+    m_pCurrSnippet(0)
+{
+    assert(vm && "InstManip requires valid VirtualMem instance");
+
+    // Populate logical->actual register map. Since this InstManip class is
+    // SparcV9-specific, we map to the values used by the BinInterface library and macros.
+
+    m_logicalToActualReg[REG_0] = R_O0;
+    m_logicalToActualReg[REG_1] = R_O1;
+    m_logicalToActualReg[REG_2] = R_O2;
+
+    // Populate output->input register map. This is SparcV9 specific and corresponds to
+    // the register mapping that occurs after a 'save' instruction is issued. Shared and
+    // local registers map to themselves.
+
+    m_outputToInputReg[R_O0] = R_I0;
+    m_outputToInputReg[R_O1] = R_I1;
+    m_outputToInputReg[R_O2] = R_I2;
+    m_outputToInputReg[R_O3] = R_I3;
+    m_outputToInputReg[R_O4] = R_I4;
+    m_outputToInputReg[R_O5] = R_I5;
+    m_outputToInputReg[R_O6] = R_I6;
+    m_outputToInputReg[R_O7] = R_I7;
+
+    for(unsigned i = R_G0; i <= R_G7; ++i)
+        m_outputToInputReg[i] = i;
+    for(unsigned i = R_L0; i <= R_L7; ++i)
+        m_outputToInputReg[i] = i;
+}
+
+void InstManip::printRange(unsigned* start,
+                           unsigned* end) const
 {
     // Dumps contents (and corresponding disassembly) of memory range given by range
     // to stdout.  TODO: Parameterize by an ostream instance; cannot do this yet
@@ -75,57 +111,60 @@
 }
 
 void InstManip::generateLoad(uint64_t value,
-                             std::vector<unsigned>& snippet,
-                             TargetRegister reg) const
+                             LogicalRegister dest,
+                             LogicalRegister tmp)
 {
     // When reg == REG_0, load the 64-bit value into %o0, using %o0 and %o1.
     // When reg == REG_1, load the 64-bit value into %o1, using %o1 and %o2.
     // The sequence of instructions is placed into the provided instruction vector.
 
+    assert(m_pCurrSnippet && "Invalid snippet for code generation");
+    assert(dest != tmp && "Distinct logical registers required");
+    std::vector<unsigned>& snippet = *m_pCurrSnippet;
+    
     unsigned initSize = snippet.size();
-    unsigned destReg, tmpReg;
-    switch(reg) {
-        case REG_0:
-            destReg = R_O0;
-            tmpReg = R_O1;
-            break;
-        case REG_1:
-            destReg = R_O1;
-            tmpReg = R_O2;
-            break;
-        default:
-            assert(0 && "Invalid destination register");
-    }
+    unsigned destReg = m_logicalToActualReg[dest];
+    unsigned tmpReg = m_logicalToActualReg[tmp];
     
-    // sethi (upper 22b of upper wrd), %o0
+    // sethi (upper 22b of upper wrd), %destReg
     snippet.push_back(MK_SETHI(destReg, HIGH22(HIGHWORD(value))));
 
-    // or %o0, (lower 10b of upper wrd), %o0
+    // or %o0, (lower 10b of upper wrd), %destReg
     snippet.push_back(MK_LOGIC_IMM(OP3_OR, destReg, destReg, LOW10(HIGHWORD(value))));
 
-    // sllx %o0, 32, %o0
+    // sllx %o0, 32, %destReg
     snippet.push_back(MK_SHIFTX(OP3_SLL, destReg, destReg, 32));
 
-    // sethi (upper 22b of lwr wrd), %o1
+    // sethi (upper 22b of lwr wrd), %tmpReg
     snippet.push_back(MK_SETHI(tmpReg, HIGH22(LOWWORD(value))));
 
-    // or %o0, %o1, %o0
+    // or %destReg, %tmpReg, %destReg
     snippet.push_back(MK_LOGIC(OP3_OR, destReg, destReg, tmpReg));
 
-    // add %o0, (lwr 10b of lwr wrd), %o0
+    // add %destReg, (lwr 10b of lwr wrd), %destReg
     snippet.push_back(MK_ADD_R_I(destReg, destReg, LOW10(LOWWORD(value))));
 
     assert(snippet.size() - initSize == getGenLoadSize() &&
-           "Unexpected number of instructions in code sequence for 64-bit value -> %destReg");
+           "Unexpected number of instructions in code sequence for 64-bit value -> %dest");
 }
 
 void InstManip::generateAddressCopy(unsigned loadInst,
-                                    std::vector<unsigned>& snippet,
-                                    TargetRegister reg) const
+                                    LogicalRegister dest,
+                                    bool afterSave)
 {
-    unsigned destReg = (reg == REG_0) ? R_O0 : R_O1;
+    // NB: After save instruction has been issued, the output registers are mapped to the
+    // input registers.  
+
+    assert(m_pCurrSnippet && "Invalid snippet for code generation");
+    std::vector<unsigned>& snippet = *m_pCurrSnippet;
+
+    unsigned initSize = snippet.size();
+    unsigned destReg = m_logicalToActualReg[dest];
     unsigned rs1 = RD_FLD(loadInst, INSTR_RS1);
-    
+
+    if(afterSave)
+        rs1 = m_outputToInputReg[rs1];
+
     if(RD_FLD(loadInst, INSTR_I)) {
         // Case 1: load is immediate-valued --> reg, imm value add instruction needed
         unsigned imm = RD_FLD(loadInst, INSTR_SIMM13);
@@ -134,14 +173,38 @@
     else {
         // Case 2: load is register-valued --> reg, reg add instruction needed
         unsigned rs2 = RD_FLD(loadInst, INSTR_RS2);
+
+        if(afterSave)
+            rs2 = m_outputToInputReg[rs2];
+        
         snippet.push_back(MK_ADD_R_R(destReg, rs1, rs2));
     }
+
+    assert(snippet.size() - initSize == getGenAddressCopySize(loadInst) &&
+           "Unexpected number of instructions in code sequence for address copy");
+}
+
+void InstManip::generateParamStore(LogicalRegister src,
+                                   StackOffset off)
+{
+    assert(m_pCurrSnippet && "Invalid snippet for code generation");
+    std::vector<unsigned>& snippet = *m_pCurrSnippet;
+
+    unsigned initSize = snippet.size();
+    unsigned srcReg = m_logicalToActualReg[src];
+
+    snippet.push_back(MK_STX_STACK(srcReg, BIAS + off));
+
+    assert(snippet.size() - initSize == getGenParamStoreSize() &&
+           "Unexpected number of instructions in code sequence for parameter store");
 }
 
 void InstManip::generateCall(uint64_t dest,
-                             uint64_t slotBase,
-                             std::vector<unsigned>& snippet) const
+                             uint64_t slotBase)
 {
+    assert(m_pCurrSnippet && "Invalid snippet for code generation");
+    std::vector<unsigned>& snippet = *m_pCurrSnippet;
+
     unsigned initSize = snippet.size();
     
     // Calculate address of call instruction from slotBase
@@ -155,92 +218,105 @@
            "Unexpected number of instructions in code sequence for call");
 }
 
-// NB: Generate restore/save currently fill the snippet (which comes from a slot) with a
-// bunch of code to save and restore the global registers.  This blows up the size of the
-// required slot quite a bit -- it would be better to generate a call to functions
-// saveGlobalRegs() and restoreGlobalRegs(), for example.  However, this works for now and
-// writing those functions means determining what the inline assembly should look like.
-// The ifdef'd-out region below is a start, but it is incomplete and generates errors at
-// assembly time. In particular, the SPARC assembly requires a '.register' directive before
-// it witnesses a use of %g2, %g3, %g6, or %g7, and that doesn't appear to be emitted simply
-// by using the inline assembly. :( TODO.
-//
-
-#if 0
-void restoreGlobRegs()
-{
-    // asm ("assembly template" : "output contraints", "input contraints")
-    // Restore the global registers %g[1-7] from the globalRegs array.
-    
-    asm("ldx %0, %%g1"::"o" (globalRegs));
-    asm("ldx %0, %%g2"::"o" (globalRegs+1));
-    asm("ldx %0, %%g3"::"o" (globalRegs+2));
-    asm("ldx %0, %%g4"::"o" (globalRegs+3));
-    asm("ldx %0, %%g5"::"o" (globalRegs+4));
-    asm("ldx %0, %%g6"::"o" (globalRegs+5));
-    asm("ldx %0, %%g7"::"o" (globalRegs+6));
+unsigned InstManip::getRestoreInst() const
+{
+    // restore %g0, 0, %g0
+    return MK_RESTORE_IMM(R_G0, R_G0, 0);
 }
-#endif
 
-void InstManip::generateRestoreShared(uint64_t restoreFromAddr,
-                                      std::vector<unsigned>& snippet,
-                                      TargetRegister reg) const 
+void InstManip::generateRestore()
 {
-    generateLoad(restoreFromAddr, snippet, reg);
+    assert(m_pCurrSnippet && "Invalid snippet for code generation");
+    std::vector<unsigned>& snippet = *m_pCurrSnippet;
 
-    unsigned destReg = (reg == REG_0) ? R_O0 : R_O1;
-    
-    snippet.push_back(MK_LOAD_IMM(R_G1, destReg, 8));
-    snippet.push_back(MK_LOAD_IMM(R_G2, destReg, 16));
-    snippet.push_back(MK_LOAD_IMM(R_G3, destReg, 24));
-    snippet.push_back(MK_LOAD_IMM(R_G4, destReg, 32));
-    snippet.push_back(MK_LOAD_IMM(R_G5, destReg, 40));
-    snippet.push_back(MK_LOAD_IMM(R_G6, destReg, 48));
-    snippet.push_back(MK_LOAD_IMM(R_G7, destReg, 56));
+    unsigned initSize = snippet.size();
+
+    snippet.push_back(getRestoreInst());
+
+    assert(snippet.size() - initSize == getGenRestoreSize() &&
+           "Unexpected number of instructions in code sequence for restore");
 }
 
-void InstManip::generateRestore(std::vector<unsigned>& snippet) const
+void InstManip::generateSave()
 {
-    // restore %o0, 0, %o0
-    snippet.push_back(MK_RESTORE_IMM(R_O0, R_O0, 0));
+    assert(m_pCurrSnippet && "Invalid snippet for code generation");
+    std::vector<unsigned>& snippet = *m_pCurrSnippet;
+
+    unsigned initSize = snippet.size();    
+
+    // save %sp, -176, %sp
+    snippet.push_back(MK_SAVE_IMM(R_O6, R_O6, -176));
+
+    assert(snippet.size() - initSize == getGenSaveSize() &&
+           "Unexpected number of instructions in code sequence for save");
 }
 
-void InstManip::generateSpillShared(uint64_t spillToAddr,
-                                    std::vector<unsigned>& snippet,
-                                    TargetRegister reg) const 
+// TODO: It will be worthwhile to generate calls to functions that spill/restore the
+// shared registers instead of dumping all of the code into the current snippet.
+
+void InstManip::generateRestoreShared(uint64_t restoreFromAddr,
+                                      LogicalRegister tmp1,
+                                      LogicalRegister tmp2) 
 {
-    generateLoad(spillToAddr, snippet, reg);
+    assert(m_pCurrSnippet && "Invalid snippet for code generation");
+    assert(tmp1 != tmp2 && "Distinct logical registers required");
+
+    std::vector<unsigned>& snippet = *m_pCurrSnippet;
+    unsigned initSize = snippet.size();
+    unsigned tmpReg = m_logicalToActualReg[tmp1];
 
-    unsigned destReg = (reg == REG_0) ? R_O0 : R_O1;
+    generateLoad(restoreFromAddr, tmp1, tmp2);
+    snippet.push_back(MK_LOAD_IMM(R_G1, tmpReg, 8));
+    snippet.push_back(MK_LOAD_IMM(R_G2, tmpReg, 16));
+    snippet.push_back(MK_LOAD_IMM(R_G3, tmpReg, 24));
+    snippet.push_back(MK_LOAD_IMM(R_G4, tmpReg, 32));
+    snippet.push_back(MK_LOAD_IMM(R_G5, tmpReg, 40));
+    snippet.push_back(MK_LOAD_IMM(R_G6, tmpReg, 48));
+    snippet.push_back(MK_LOAD_IMM(R_G7, tmpReg, 56));
 
-    snippet.push_back(MK_STORE_IMM(R_G1, destReg, 8));
-    snippet.push_back(MK_STORE_IMM(R_G2, destReg, 16));
-    snippet.push_back(MK_STORE_IMM(R_G3, destReg, 24));
-    snippet.push_back(MK_STORE_IMM(R_G4, destReg, 32));
-    snippet.push_back(MK_STORE_IMM(R_G5, destReg, 40));
-    snippet.push_back(MK_STORE_IMM(R_G6, destReg, 48));
-    snippet.push_back(MK_STORE_IMM(R_G7, destReg, 56));
+    assert(snippet.size() - initSize == getGenRestoreSharedSize() &&
+           "Unexpected number of instructions in code sequence for restore shared");
 }
 
-void InstManip::generateSave(std::vector<unsigned>& snippet) const
+void InstManip::generateSpillShared(uint64_t spillToAddr,
+                                    LogicalRegister tmp1,
+                                    LogicalRegister tmp2) 
 {
-    // save %o0, 0, %o0
-    snippet.push_back(MK_SAVE_IMM(R_O0, R_O0, 0));
+    assert(m_pCurrSnippet && "Invalid snippet for code generation");
+    assert(tmp1 != tmp2 && "Distinct logical registers required");
+
+    std::vector<unsigned>& snippet = *m_pCurrSnippet;
+    unsigned initSize = snippet.size();    
+    unsigned tmpReg = m_logicalToActualReg[tmp1];
+
+    generateLoad(spillToAddr, tmp1, tmp2);
+    snippet.push_back(MK_STORE_IMM(R_G1, tmpReg, 8));
+    snippet.push_back(MK_STORE_IMM(R_G2, tmpReg, 16));
+    snippet.push_back(MK_STORE_IMM(R_G3, tmpReg, 24));
+    snippet.push_back(MK_STORE_IMM(R_G4, tmpReg, 32));
+    snippet.push_back(MK_STORE_IMM(R_G5, tmpReg, 40));
+    snippet.push_back(MK_STORE_IMM(R_G6, tmpReg, 48));
+    snippet.push_back(MK_STORE_IMM(R_G7, tmpReg, 56));
+
+    assert(snippet.size() - initSize == getGenSpillSharedSize() &&
+           "Unexpected number of instructions in code sequence for spill shared");
 }
 
 void InstManip::generateBranchAlways(uint64_t dest,
                                      uint64_t slotBase,
-                                     std::vector<unsigned>& snippet,
-                                     bool annul) const
+                                     unsigned delaySlotInstr)
 {
+    assert(m_pCurrSnippet && "Invalid snippet for code generation");
+    std::vector<unsigned>& snippet = *m_pCurrSnippet;
+
     unsigned initSize = snippet.size();
     
     // Calculate address of branch instruction from slotBase
     uint64_t branchInstAddr = slotBase + getInstWidth() * snippet.size();
 
-    // Add branch instruction and nop (for branch delay slot) to code snippet.
-    snippet.push_back(getBranchAlways(dest, branchInstAddr, annul));
-    snippet.push_back(NOP_INST);
+    // Add branch instruction and the specified delay slot instruction to code snippet.
+    snippet.push_back(getBranchAlways(dest, branchInstAddr, false)); // annul bit low
+    snippet.push_back(delaySlotInstr);
 
     assert(snippet.size() - initSize == getGenBranchAlwaysSize() &&
            "Unexpected number of instruction in code sequence for branch-always");


Index: llvm/lib/Reoptimizer/Inst/InstManip.h
diff -u llvm/lib/Reoptimizer/Inst/InstManip.h:1.9 llvm/lib/Reoptimizer/Inst/InstManip.h:1.10
--- llvm/lib/Reoptimizer/Inst/InstManip.h:1.9	Tue Apr 29 13:36:53 2003
+++ llvm/lib/Reoptimizer/Inst/InstManip.h	Tue Apr 29 21:08:42 2003
@@ -77,15 +77,24 @@
 class InstManip 
 {
   public:
-    InstManip(VirtualMem* vm): m_pVM(vm)
-    {
-        assert(vm && "InstManip requires valid VirtualMem instance");
-    }
+    InstManip(VirtualMem* vm);
 
     typedef std::pair<uint64_t, unsigned> Inst; // (location, inst word) pair
     
-    enum TargetRegister { REG_0, REG_1 };
-
+    // Logical registers used by clients of this class, mapped to machine-specific IDs
+    // by the logical -> actual register map.
+    enum LogicalRegister {
+        REG_0,
+        REG_1,
+        REG_2
+    };
+
+    // Offsets in stack frame for function parameters
+    enum StackOffset {
+        PARAM_0 = 128,
+        PARAM_1 = 136
+    }; 
+    
     void            printRange(unsigned* start, unsigned* end) const;
     inline void     printRange(uint64_t start, uint64_t end) const;
                     
@@ -94,38 +103,41 @@
                     
     uint64_t        skipFunctionHdr(uint64_t addr) const;
                     
+    void            startCode(std::vector<unsigned>& snippet) { m_pCurrSnippet = &snippet; }
+    void            endCode()                                 { m_pCurrSnippet = 0;        }
+
     void            generateAddressCopy(unsigned loadInst,
-                                        std::vector<unsigned>& snippet,
-                                        TargetRegister reg = REG_0) const;
+                                        LogicalRegister dest,
+                                        bool afterSave);
+
+    void            generateBranchAlways(uint64_t dest,
+                                         uint64_t slotBase,
+                                         unsigned delaySlotInstr = NOP_INST);
+
+    void            generateCall(uint64_t dest, uint64_t slotBase);
 
     void            generateLoad(uint64_t value,
-                                 std::vector<unsigned>& snippet,
-                                 TargetRegister reg = REG_0) const;
+                                 LogicalRegister dest,
+                                 LogicalRegister tmp);
 
-    void            generateCall(uint64_t dest,
-                                 uint64_t slotBase,
-                                 std::vector<unsigned>& snippet) const;
+    void            generateParamStore(LogicalRegister src, StackOffset off);
 
-    void            generateRestore(std::vector<unsigned>& snippet) const;
-    void            generateSave(std::vector<unsigned>& snippet) const;
+    void            generateRestore();
+    void            generateSave();
 
-    void            generateSpillShared(uint64_t spillFromAddr,
-                                        std::vector<unsigned>& snippet,
-                                        TargetRegister reg = REG_0) const;
-    
-    void            generateRestoreShared(uint64_t restorFromAddr,
-                                          std::vector<unsigned>& snippet,
-                                          TargetRegister reg = REG_0) const;
+    void            generateRestoreShared(uint64_t restoreFromAddr,
+                                          LogicalRegister tmp1 = REG_0,
+                                          LogicalRegister tmp2 = REG_1);
 
-    void            generateBranchAlways(uint64_t dest,
-                                         uint64_t slotBase,
-                                         std::vector<unsigned>& snippet,
-                                         bool annul = true) const;
+    void            generateSpillShared(uint64_t spillFromAddr,
+                                        LogicalRegister tmp1 = REG_0,
+                                        LogicalRegister tmp2 = REG_1);
 
     void            findCandidates(uint64_t start,
                                    uint64_t end,
                                    std::vector<InstCandidate>& candidates);
 
+    unsigned        getRestoreInst() const;
     inline unsigned getBranchAlways(uint64_t dest, uint64_t pc, bool annulHigh = true) const;
     inline unsigned getCallInst(uint64_t dest, uint64_t pc) const;
     inline bool     isBranch(unsigned inst) const;
@@ -138,18 +150,21 @@
     unsigned        getGenCallSize() const           { return 2;                              }
     unsigned        getGenBranchAlwaysSize() const   { return 2;                              }
     unsigned        getGenSaveSize() const           { return 1;                              }
+    unsigned        getGenParamStoreSize() const     { return 1;                              }
     unsigned        getGenSpillSharedSize() const    { return getGenLoadSize() + SHARED_SIZE; }
     unsigned        getGenRestoreSharedSize() const  { return getGenLoadSize() + SHARED_SIZE; }
     unsigned        getGenRestoreSize() const        { return 1;                              }
     unsigned        getInstWidth() const             { return 4;                              }
     unsigned        getSharedSize() const            { return SHARED_SIZE;                    }
 
-    inline unsigned getAddressCopySize(unsigned loadInst) const;
+    inline unsigned getGenAddressCopySize(unsigned loadInst) const;
 
     uint64_t getPhase3SpillAddr() { return (uint64_t) sm_phase3SpillRegion; }
 
   private:
     InstManip() {}
+    typedef std::map<LogicalRegister, unsigned> LogicalToActualRegMap;
+    typedef std::map<unsigned, unsigned>        OutputToInputRegMap;
 
     bool            isCandidateLoad(uint64_t addr,
                                     uint64_t end,
@@ -168,22 +183,33 @@
                                       uint64_t end,
                                       unsigned fpOffset);
     
-    // Branch-always (annul bit high) instruction base (i.e. address not filled in yet)
-    static const unsigned BRANCH_ALWAYS_BASE = 0x30480000;
+    VirtualMem*            m_pVM;
+    std::vector<unsigned>* m_pCurrSnippet;       
+    LogicalToActualRegMap  m_logicalToActualReg; // Maps logical -> actual register 
+    OutputToInputRegMap    m_outputToInputReg;   // Maps input register -> output register
+
+    // Branch-always (annul bit high) instruction base (i.e., address not filled in yet)
+    static const unsigned BRANCH_ALWAYS_BASE_ANNUL;
+
+    // Branch-always (annul bit low) instruction base (i.e., address not filled in yet)
+    static const unsigned BRANCH_ALWAYS_BASE;
+
+    // NOP instruction
     static const unsigned NOP_INST;
 
     // Size (in number of 64-bit words) required for storing shared registers
     static const unsigned SHARED_SIZE = 7;
 
-    VirtualMem* m_pVM;
-    
+    // Sparc-specific constant used in SP manipulations
+    static const unsigned BIAS;
+
     // Memory region into which to spill shared registers when executing a phase 4 slot
     // (i.e., the slot that invokes the phase4 function, the slot written by phase 3
     // invocations).  NB: One region is sufficient and we do not need stack semantics
     // because only one activation of a phase 4 slot ever occurs at a given time (assuming
     // single-threaded execution).
 
-    static uint64_t sm_phase3SpillRegion[SHARED_SIZE];
+    static uint64_t                     sm_phase3SpillRegion[SHARED_SIZE];
 };
 
 void InstManip::printRange(uint64_t start, uint64_t end) const
@@ -209,8 +235,9 @@
     // branch instruction is executed (i.e., the address of the branch instruction). NB:
     // Only handles branch-always-annul-high at the moment
 
-    assert(annul && "Unhandled case: annul bit low");
-    return getUndepJumpInstr(BRANCH_ALWAYS_BASE, dest, pc);
+    return getUndepJumpInstr(annul ? BRANCH_ALWAYS_BASE_ANNUL : BRANCH_ALWAYS_BASE,
+                             dest,
+                             pc);
 }
 
 unsigned InstManip::getCallInst(uint64_t dest, uint64_t pc) const
@@ -227,7 +254,7 @@
     return ::isBranchInstr(inst);
 }
 
-unsigned InstManip::getAddressCopySize(unsigned loadInst) const
+unsigned InstManip::getGenAddressCopySize(unsigned loadInst) const
 {
     // Determine the number of instructions required to load the address value used by the
     // load instruction into some register.


Index: llvm/lib/Reoptimizer/Inst/Phases.cpp
diff -u llvm/lib/Reoptimizer/Inst/Phases.cpp:1.14 llvm/lib/Reoptimizer/Inst/Phases.cpp:1.15
--- llvm/lib/Reoptimizer/Inst/Phases.cpp:1.14	Tue Apr 29 14:48:29 2003
+++ llvm/lib/Reoptimizer/Inst/Phases.cpp	Tue Apr 29 21:08:42 2003
@@ -16,18 +16,18 @@
 //           slot (annulling bit should specify *not* to execute the branch delay slot) in
 //           the dummy function.
 //
-// 	     2b. In the new slot, write the contents of the phase 2 slot:
-//                        +------------------------------+
-//                        | load parameter for phase 3   |
-//                        |       call to phase 3        |
-//                        |            nop               |
-//                        |    branch back to orig code  |
-//                        |            nop               |
-//                        +------------------------------+
+// 	     2b. In the new slot, write the contents of the phase 3 slot:
+//                        +------------------------------------+
+//                        |   save registers (new stack frame) |
+//                        |     load parameter for phase 3     |
+//                        |           call to phase 3          |
+//                        |                nop                 |
+//                        |        branch back to orig code    |
+//                        |          restore registers         |
+//                        +------------------------------------+
 //               where the parameter to phase 3 is a pointer the heap-allocated Phase3Info
 //               instance.
 //
-//
 // PHASE 3:
 //
 //       - Deallocate the parameter structure whenever it is convenient to do so.
@@ -40,18 +40,17 @@
 //       3. For each load-volatile candidate,
 //         3a. Obtain a new slot in the dummy function.
 //         3b. Replace the load candidate with branch to slot.
-//         3c. In the new slot, write the contents of the phase 3 slot:
+//         3c. In the new slot, write the contents of the phase 4 slot:
 //                  +---------------------------------------+
-//                  |             save registers            |
+//                  |    save registers (new stack frame)   |
 //                  |           save global registers       |
 //                  | copy load-src addr to param1 register |
 //                  | load p4 struct ptr to param2 register |
 //                  |             call to phase 4           |
 //                  |                  nop                  |
-//                  |             restore registers         |
 //                  |        restore global registers       |
 //                  |          branch back to orig code     |
-//                  |                  nop                  |
+//                  |           restore registers           |
 //                  +---------------------------------------+
 //
 //       4. Deallocate the slot that originated this invocation.
@@ -62,21 +61,11 @@
 //        1a. If tag is in GBT, we have a valid candidate, so do step 2.
 //        1b. If tag is not in GBT, our candidate is invalid, so delete slot and return to
 //        original code.
-//        
-//      2. Set up the second phase 4 slot that will actually call the instrumentation function:
+//
+//      2. Set up the phase 5 slot that will actually call the instrumentation function:
 //                  +---------------------------------------+
-//                  |             save registers            |
-//                  |           save global registers       |
-//                  |           call to inst func           |
-//                  |                  nop                  |
-//                  |             restore registers         |
-//                  |        restore global registers       |
-//                  |          branch back to orig code     |
-//                  |                  nop                  |
+//                  |                  ...                  |
 //                  +---------------------------------------+
-//      This "instrumentation slot" may have to be expanded later to store the return value
-//      in an alloca'd temporary, unless the phase4 function itself can invoke the
-//      instrumentation function, would be *highly* ideal.
 //
 
 #include <stdlib.h>
@@ -285,8 +274,9 @@
     std::string funcName;
     AddressRange range;
 
-    // TODO: Come up with a better way to do this that doesn't involve storing the entire
-    // list of functions here -- this could be quite large.
+    // Obtain the list of functions to transform, from the ElfReader module.  TODO: Come
+    // up with a better way to do this that doesn't involve storing the entire list of
+    // functions here -- this could be quite large.
     
     vector<std::pair<std::string, AddressRange> > funcs;
     while(elfReader.GetNextFunction(funcName, range))
@@ -294,6 +284,13 @@
 
     cerr << "There are " << funcs.size() << " functions to process." << endl << endl;
 
+    // Heap-allocate a region of memory in which to spill shared registers before phase3
+    // invocations.  We allocate one unit of space (given by InstManip::getSharedSize())
+    // for each function that we transform.
+
+    sm_pSpillRegion = new uint64_t[m_instManip.getSharedSize() * funcs.size()];
+    sm_pCurrSpill = sm_pSpillRegion;
+
     for(vector<std::pair<std::string, AddressRange> >::iterator i = funcs.begin(),
             e = funcs.end(); i != e; ++i) {
         if(i->first == "fibs") {
@@ -334,17 +331,34 @@
            "Unhandled case: branch instruction first in function body");
     vm->writeInstToVM(repInstAddr, m_instManip.getBranchAlways(slotBase, repInstAddr));
 
-    // Generate a) code to load the address of the heap-allocated Phase3Info struct into a
-    // register, which will be used as a parameter to the phase3 call, b) the call to
-    // phase 3 itself, and c) the direct branch back to the original code.
+    // Generate the phase 3 slot. See picture of phase 3 slot contents for more info.
 
     Phase3Info* p3info = new Phase3Info(range, origInst, repInstAddr,
                                         slotBase, getSlotSize(), m_pTraceCache);
 
     vector<unsigned> snippet;
-    m_instManip.generateLoad((uint64_t) p3info, snippet);
-    m_instManip.generateCall((uint64_t) &phase3, slotBase, snippet);
-    m_instManip.generateBranchAlways(repInstAddr, slotBase, snippet);
+    m_instManip.startCode(snippet);
+
+    m_instManip.generateSave();
+    m_instManip.generateSpillShared((uint64_t) sm_pCurrSpill);
+    m_instManip.generateLoad((uint64_t) p3info, InstManip::REG_0, InstManip::REG_1);
+    m_instManip.generateCall((uint64_t) &phase3, slotBase);
+    m_instManip.generateRestoreShared((uint64_t) sm_pCurrSpill);
+    m_instManip.generateBranchAlways(repInstAddr, slotBase, m_instManip.getRestoreInst());
+
+    m_instManip.endCode();
+
+    // Dump snippet instructions:
+    cerr << "phase3 slot instructions:" << endl;
+    for(vector<unsigned>::iterator j = snippet.begin(), k = snippet.end(); j != k; ++j) {
+        m_instManip.printInst(*j);
+        cerr << endl;
+    }
+
+    // Bump the current spill pointer to the next "spill slot" in the spill region used
+    // before/after phase3() invocations.
+
+    sm_pCurrSpill += m_instManip.getSharedSize();
 
     // Copy the snippet code into the slot
     assert(snippet.size() == getSlotSize() && "Snippet size does not match slot size");
@@ -356,8 +370,11 @@
     // The following sum corresponds to the sizes consumed by the various regions of the
     // phase 2 slot.  See picture of phase 2 contents for details.
 
-    return m_instManip.getGenLoadSize() +
+    return m_instManip.getGenSaveSize() +
+        m_instManip.getGenSpillSharedSize() +
+        m_instManip.getGenLoadSize() +
         m_instManip.getGenCallSize() +
+        m_instManip.getGenRestoreSharedSize() +
         m_instManip.getGenBranchAlwaysSize();
 }
 
@@ -430,25 +447,32 @@
         uint64_t slotBase = replaceInstWithBrToSlot(i->front().first, getSlotSize(*i),
                                                     m_pTraceCache, m_instManip);
 
-        // Generate a) code to save the registers, b) instruction(s) to store the load
-        // source address into a phase4 parameter register, c) the load of (the
-        // pointer-to) the heap-allocated Phase4Info structure into a phase4 parameter
-        // register, and d) code to call phase 3, restore regs, and branch back to
-        // original code.
+        // Generate the phase 4 slot. See picture of phase 4 slot contents for more info.
 
         Phase4Info* p4info = new Phase4Info(*i, slotBase, getSlotSize(*i), m_pTraceCache);
 
         uint64_t spillAddr = m_instManip.getPhase3SpillAddr();
-        
+
         vector<unsigned> snippet;
-        m_instManip.generateSave(snippet);
-        m_instManip.generateAddressCopy(i->front().second, snippet); // Uses InstManip::REG_0, live to call
-        m_instManip.generateSpillShared(spillAddr, snippet, InstManip::REG_1);
-        m_instManip.generateLoad((uint64_t) p4info, snippet, InstManip::REG_1);
-        m_instManip.generateCall((uint64_t) &phase4, slotBase, snippet);
-        m_instManip.generateRestoreShared(spillAddr, snippet);
-        m_instManip.generateRestore(snippet);
-        m_instManip.generateBranchAlways(i->front().first, slotBase, snippet);
+        m_instManip.startCode(snippet);
+
+        // NB: We pass parameters to the phase4 function in REG_0 and REG_1 on the
+        // assumption that the input parameters will be looked for there. However, it is
+        // possible that the input parameters will be taken from the parameter array at
+        // fixed offsets from the stack pointer.  Hence, we store the parameters there as
+        // well.
+        
+        m_instManip.generateSave();
+        m_instManip.generateAddressCopy(i->front().second, InstManip::REG_0, true);      // REG_0 live to call
+        m_instManip.generateParamStore(InstManip::REG_0, InstManip::PARAM_0);
+        m_instManip.generateSpillShared(spillAddr, InstManip::REG_1, InstManip::REG_2);
+        m_instManip.generateLoad((uint64_t) p4info, InstManip::REG_1, InstManip::REG_2); // REG_1 live to call
+        m_instManip.generateParamStore(InstManip::REG_1, InstManip::PARAM_1);
+        m_instManip.generateCall((uint64_t) &phase4, slotBase);
+        m_instManip.generateRestoreShared(spillAddr);
+        m_instManip.generateBranchAlways(i->front().first, slotBase, m_instManip.getRestoreInst());
+
+        m_instManip.endCode();
 
         // Dump snippet instructions:
 
@@ -474,12 +498,13 @@
     // phase 3 slot.  See picture of phase 3 contents for details.
 
     return m_instManip.getGenSaveSize() +
-        m_instManip.getAddressCopySize(cand.front().second) +
+        m_instManip.getGenAddressCopySize(cand.front().second) +
+        m_instManip.getGenParamStoreSize() +
         m_instManip.getGenSpillSharedSize() +
         m_instManip.getGenLoadSize() +
+        m_instManip.getGenParamStoreSize() +
         m_instManip.getGenCallSize() +
         m_instManip.getGenRestoreSharedSize() +
-        m_instManip.getGenRestoreSize() +
         m_instManip.getGenBranchAlwaysSize();
 }
 


Index: llvm/lib/Reoptimizer/Inst/design.txt
diff -u llvm/lib/Reoptimizer/Inst/design.txt:1.10 llvm/lib/Reoptimizer/Inst/design.txt:1.11
--- llvm/lib/Reoptimizer/Inst/design.txt:1.10	Tue Apr 29 13:36:53 2003
+++ llvm/lib/Reoptimizer/Inst/design.txt	Tue Apr 29 21:08:42 2003
@@ -896,6 +896,10 @@
 
     - Write phase 5 slot generation code, phase 5 function itself, etc.
 
+    - Optimizations:
+        - No need to save registers (other than those clobbered) in phase 3 slot, since phase 3
+          is invoked at the start of the function. Must still spill/restore shared, though.
+
 }}}
 
 {{{ PHASE OUTLINE





More information about the llvm-commits mailing list