[llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/InstManip.cpp InstManip.h Phases.cpp design.txt
Joel Stanley
jstanley at cs.uiuc.edu
Tue Apr 29 21:03:01 PDT 2003
Changes in directory llvm/lib/Reoptimizer/Inst:
InstManip.cpp updated: 1.8 -> 1.9
InstManip.h updated: 1.9 -> 1.10
Phases.cpp updated: 1.14 -> 1.15
design.txt updated: 1.10 -> 1.11
---
Log message:
* A logical -> actual register mapping mechanism exists
* Calling conventions are now adhered to
* Slots in the tracecache now obtain a new stack frame
---
Diffs of the changes:
Index: llvm/lib/Reoptimizer/Inst/InstManip.cpp
diff -u llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.8 llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.9
--- llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.8 Tue Apr 29 13:36:53 2003
+++ llvm/lib/Reoptimizer/Inst/InstManip.cpp Tue Apr 29 21:08:42 2003
@@ -12,6 +12,9 @@
#include "InstManip.h"
const unsigned InstManip::NOP_INST = 0x01000000;
+const unsigned InstManip::BRANCH_ALWAYS_BASE = 0x10480000;
+const unsigned InstManip::BRANCH_ALWAYS_BASE_ANNUL = 0x30480000;
+const unsigned InstManip::BIAS = 2047;
uint64_t InstManip::sm_phase3SpillRegion[InstManip::SHARED_SIZE];
using std::cout;
@@ -43,7 +46,40 @@
ostr << "}";
}
-void InstManip::printRange(unsigned* start, unsigned* end) const
+InstManip::InstManip(VirtualMem* vm):
+ m_pVM(vm),
+ m_pCurrSnippet(0)
+{
+ assert(vm && "InstManip requires valid VirtualMem instance");
+
+ // Populate logical->actual register map. Since this InstManip class is
+ // SparcV9-specific, we map to the values used by the BinInterface library and macros.
+
+ m_logicalToActualReg[REG_0] = R_O0;
+ m_logicalToActualReg[REG_1] = R_O1;
+ m_logicalToActualReg[REG_2] = R_O2;
+
+ // Populate output->input register map. This is SparcV9 specific and corresponds to
+ // the register mapping that occurs after a 'save' instruction is issued. Shared and
+ // local registers map to themselves.
+
+ m_outputToInputReg[R_O0] = R_I0;
+ m_outputToInputReg[R_O1] = R_I1;
+ m_outputToInputReg[R_O2] = R_I2;
+ m_outputToInputReg[R_O3] = R_I3;
+ m_outputToInputReg[R_O4] = R_I4;
+ m_outputToInputReg[R_O5] = R_I5;
+ m_outputToInputReg[R_O6] = R_I6;
+ m_outputToInputReg[R_O7] = R_I7;
+
+ for(unsigned i = R_G0; i <= R_G7; ++i)
+ m_outputToInputReg[i] = i;
+ for(unsigned i = R_L0; i <= R_L7; ++i)
+ m_outputToInputReg[i] = i;
+}
+
+void InstManip::printRange(unsigned* start,
+ unsigned* end) const
{
// Dumps contents (and corresponding disassembly) of memory range given by range
// to stdout. TODO: Parameterize by an ostream instance; cannot do this yet
@@ -75,57 +111,60 @@
}
void InstManip::generateLoad(uint64_t value,
- std::vector<unsigned>& snippet,
- TargetRegister reg) const
+ LogicalRegister dest,
+ LogicalRegister tmp)
{
// When reg == REG_0, load the 64-bit value into %o0, using %o0 and %o1.
// When reg == REG_1, load the 64-bit value into %o1, using %o1 and %o2.
// The sequence of instructions is placed into the provided instruction vector.
+ assert(m_pCurrSnippet && "Invalid snippet for code generation");
+ assert(dest != tmp && "Distinct logical registers required");
+ std::vector<unsigned>& snippet = *m_pCurrSnippet;
+
unsigned initSize = snippet.size();
- unsigned destReg, tmpReg;
- switch(reg) {
- case REG_0:
- destReg = R_O0;
- tmpReg = R_O1;
- break;
- case REG_1:
- destReg = R_O1;
- tmpReg = R_O2;
- break;
- default:
- assert(0 && "Invalid destination register");
- }
+ unsigned destReg = m_logicalToActualReg[dest];
+ unsigned tmpReg = m_logicalToActualReg[tmp];
- // sethi (upper 22b of upper wrd), %o0
+ // sethi (upper 22b of upper wrd), %destReg
snippet.push_back(MK_SETHI(destReg, HIGH22(HIGHWORD(value))));
- // or %o0, (lower 10b of upper wrd), %o0
+ // or %o0, (lower 10b of upper wrd), %destReg
snippet.push_back(MK_LOGIC_IMM(OP3_OR, destReg, destReg, LOW10(HIGHWORD(value))));
- // sllx %o0, 32, %o0
+ // sllx %o0, 32, %destReg
snippet.push_back(MK_SHIFTX(OP3_SLL, destReg, destReg, 32));
- // sethi (upper 22b of lwr wrd), %o1
+ // sethi (upper 22b of lwr wrd), %tmpReg
snippet.push_back(MK_SETHI(tmpReg, HIGH22(LOWWORD(value))));
- // or %o0, %o1, %o0
+ // or %destReg, %tmpReg, %destReg
snippet.push_back(MK_LOGIC(OP3_OR, destReg, destReg, tmpReg));
- // add %o0, (lwr 10b of lwr wrd), %o0
+ // add %destReg, (lwr 10b of lwr wrd), %destReg
snippet.push_back(MK_ADD_R_I(destReg, destReg, LOW10(LOWWORD(value))));
assert(snippet.size() - initSize == getGenLoadSize() &&
- "Unexpected number of instructions in code sequence for 64-bit value -> %destReg");
+ "Unexpected number of instructions in code sequence for 64-bit value -> %dest");
}
void InstManip::generateAddressCopy(unsigned loadInst,
- std::vector<unsigned>& snippet,
- TargetRegister reg) const
+ LogicalRegister dest,
+ bool afterSave)
{
- unsigned destReg = (reg == REG_0) ? R_O0 : R_O1;
+ // NB: After save instruction has been issued, the output registers are mapped to the
+ // input registers.
+
+ assert(m_pCurrSnippet && "Invalid snippet for code generation");
+ std::vector<unsigned>& snippet = *m_pCurrSnippet;
+
+ unsigned initSize = snippet.size();
+ unsigned destReg = m_logicalToActualReg[dest];
unsigned rs1 = RD_FLD(loadInst, INSTR_RS1);
-
+
+ if(afterSave)
+ rs1 = m_outputToInputReg[rs1];
+
if(RD_FLD(loadInst, INSTR_I)) {
// Case 1: load is immediate-valued --> reg, imm value add instruction needed
unsigned imm = RD_FLD(loadInst, INSTR_SIMM13);
@@ -134,14 +173,38 @@
else {
// Case 2: load is register-valued --> reg, reg add instruction needed
unsigned rs2 = RD_FLD(loadInst, INSTR_RS2);
+
+ if(afterSave)
+ rs2 = m_outputToInputReg[rs2];
+
snippet.push_back(MK_ADD_R_R(destReg, rs1, rs2));
}
+
+ assert(snippet.size() - initSize == getGenAddressCopySize(loadInst) &&
+ "Unexpected number of instructions in code sequence for address copy");
+}
+
+void InstManip::generateParamStore(LogicalRegister src,
+ StackOffset off)
+{
+ assert(m_pCurrSnippet && "Invalid snippet for code generation");
+ std::vector<unsigned>& snippet = *m_pCurrSnippet;
+
+ unsigned initSize = snippet.size();
+ unsigned srcReg = m_logicalToActualReg[src];
+
+ snippet.push_back(MK_STX_STACK(srcReg, BIAS + off));
+
+ assert(snippet.size() - initSize == getGenParamStoreSize() &&
+ "Unexpected number of instructions in code sequence for parameter store");
}
void InstManip::generateCall(uint64_t dest,
- uint64_t slotBase,
- std::vector<unsigned>& snippet) const
+ uint64_t slotBase)
{
+ assert(m_pCurrSnippet && "Invalid snippet for code generation");
+ std::vector<unsigned>& snippet = *m_pCurrSnippet;
+
unsigned initSize = snippet.size();
// Calculate address of call instruction from slotBase
@@ -155,92 +218,105 @@
"Unexpected number of instructions in code sequence for call");
}
-// NB: Generate restore/save currently fill the snippet (which comes from a slot) with a
-// bunch of code to save and restore the global registers. This blows up the size of the
-// required slot quite a bit -- it would be better to generate a call to functions
-// saveGlobalRegs() and restoreGlobalRegs(), for example. However, this works for now and
-// writing those functions means determining what the inline assembly should look like.
-// The ifdef'd-out region below is a start, but it is incomplete and generates errors at
-// assembly time. In particular, the SPARC assembly requires a '.register' directive before
-// it witnesses a use of %g2, %g3, %g6, or %g7, and that doesn't appear to be emitted simply
-// by using the inline assembly. :( TODO.
-//
-
-#if 0
-void restoreGlobRegs()
-{
- // asm ("assembly template" : "output contraints", "input contraints")
- // Restore the global registers %g[1-7] from the globalRegs array.
-
- asm("ldx %0, %%g1"::"o" (globalRegs));
- asm("ldx %0, %%g2"::"o" (globalRegs+1));
- asm("ldx %0, %%g3"::"o" (globalRegs+2));
- asm("ldx %0, %%g4"::"o" (globalRegs+3));
- asm("ldx %0, %%g5"::"o" (globalRegs+4));
- asm("ldx %0, %%g6"::"o" (globalRegs+5));
- asm("ldx %0, %%g7"::"o" (globalRegs+6));
+unsigned InstManip::getRestoreInst() const
+{
+ // restore %g0, 0, %g0
+ return MK_RESTORE_IMM(R_G0, R_G0, 0);
}
-#endif
-void InstManip::generateRestoreShared(uint64_t restoreFromAddr,
- std::vector<unsigned>& snippet,
- TargetRegister reg) const
+void InstManip::generateRestore()
{
- generateLoad(restoreFromAddr, snippet, reg);
+ assert(m_pCurrSnippet && "Invalid snippet for code generation");
+ std::vector<unsigned>& snippet = *m_pCurrSnippet;
- unsigned destReg = (reg == REG_0) ? R_O0 : R_O1;
-
- snippet.push_back(MK_LOAD_IMM(R_G1, destReg, 8));
- snippet.push_back(MK_LOAD_IMM(R_G2, destReg, 16));
- snippet.push_back(MK_LOAD_IMM(R_G3, destReg, 24));
- snippet.push_back(MK_LOAD_IMM(R_G4, destReg, 32));
- snippet.push_back(MK_LOAD_IMM(R_G5, destReg, 40));
- snippet.push_back(MK_LOAD_IMM(R_G6, destReg, 48));
- snippet.push_back(MK_LOAD_IMM(R_G7, destReg, 56));
+ unsigned initSize = snippet.size();
+
+ snippet.push_back(getRestoreInst());
+
+ assert(snippet.size() - initSize == getGenRestoreSize() &&
+ "Unexpected number of instructions in code sequence for restore");
}
-void InstManip::generateRestore(std::vector<unsigned>& snippet) const
+void InstManip::generateSave()
{
- // restore %o0, 0, %o0
- snippet.push_back(MK_RESTORE_IMM(R_O0, R_O0, 0));
+ assert(m_pCurrSnippet && "Invalid snippet for code generation");
+ std::vector<unsigned>& snippet = *m_pCurrSnippet;
+
+ unsigned initSize = snippet.size();
+
+ // save %sp, -176, %sp
+ snippet.push_back(MK_SAVE_IMM(R_O6, R_O6, -176));
+
+ assert(snippet.size() - initSize == getGenSaveSize() &&
+ "Unexpected number of instructions in code sequence for save");
}
-void InstManip::generateSpillShared(uint64_t spillToAddr,
- std::vector<unsigned>& snippet,
- TargetRegister reg) const
+// TODO: It will be worthwhile to generate calls to functions that spill/restore the
+// shared registers instead of dumping all of the code into the current snippet.
+
+void InstManip::generateRestoreShared(uint64_t restoreFromAddr,
+ LogicalRegister tmp1,
+ LogicalRegister tmp2)
{
- generateLoad(spillToAddr, snippet, reg);
+ assert(m_pCurrSnippet && "Invalid snippet for code generation");
+ assert(tmp1 != tmp2 && "Distinct logical registers required");
+
+ std::vector<unsigned>& snippet = *m_pCurrSnippet;
+ unsigned initSize = snippet.size();
+ unsigned tmpReg = m_logicalToActualReg[tmp1];
- unsigned destReg = (reg == REG_0) ? R_O0 : R_O1;
+ generateLoad(restoreFromAddr, tmp1, tmp2);
+ snippet.push_back(MK_LOAD_IMM(R_G1, tmpReg, 8));
+ snippet.push_back(MK_LOAD_IMM(R_G2, tmpReg, 16));
+ snippet.push_back(MK_LOAD_IMM(R_G3, tmpReg, 24));
+ snippet.push_back(MK_LOAD_IMM(R_G4, tmpReg, 32));
+ snippet.push_back(MK_LOAD_IMM(R_G5, tmpReg, 40));
+ snippet.push_back(MK_LOAD_IMM(R_G6, tmpReg, 48));
+ snippet.push_back(MK_LOAD_IMM(R_G7, tmpReg, 56));
- snippet.push_back(MK_STORE_IMM(R_G1, destReg, 8));
- snippet.push_back(MK_STORE_IMM(R_G2, destReg, 16));
- snippet.push_back(MK_STORE_IMM(R_G3, destReg, 24));
- snippet.push_back(MK_STORE_IMM(R_G4, destReg, 32));
- snippet.push_back(MK_STORE_IMM(R_G5, destReg, 40));
- snippet.push_back(MK_STORE_IMM(R_G6, destReg, 48));
- snippet.push_back(MK_STORE_IMM(R_G7, destReg, 56));
+ assert(snippet.size() - initSize == getGenRestoreSharedSize() &&
+ "Unexpected number of instructions in code sequence for restore shared");
}
-void InstManip::generateSave(std::vector<unsigned>& snippet) const
+void InstManip::generateSpillShared(uint64_t spillToAddr,
+ LogicalRegister tmp1,
+ LogicalRegister tmp2)
{
- // save %o0, 0, %o0
- snippet.push_back(MK_SAVE_IMM(R_O0, R_O0, 0));
+ assert(m_pCurrSnippet && "Invalid snippet for code generation");
+ assert(tmp1 != tmp2 && "Distinct logical registers required");
+
+ std::vector<unsigned>& snippet = *m_pCurrSnippet;
+ unsigned initSize = snippet.size();
+ unsigned tmpReg = m_logicalToActualReg[tmp1];
+
+ generateLoad(spillToAddr, tmp1, tmp2);
+ snippet.push_back(MK_STORE_IMM(R_G1, tmpReg, 8));
+ snippet.push_back(MK_STORE_IMM(R_G2, tmpReg, 16));
+ snippet.push_back(MK_STORE_IMM(R_G3, tmpReg, 24));
+ snippet.push_back(MK_STORE_IMM(R_G4, tmpReg, 32));
+ snippet.push_back(MK_STORE_IMM(R_G5, tmpReg, 40));
+ snippet.push_back(MK_STORE_IMM(R_G6, tmpReg, 48));
+ snippet.push_back(MK_STORE_IMM(R_G7, tmpReg, 56));
+
+ assert(snippet.size() - initSize == getGenSpillSharedSize() &&
+ "Unexpected number of instructions in code sequence for spill shared");
}
void InstManip::generateBranchAlways(uint64_t dest,
uint64_t slotBase,
- std::vector<unsigned>& snippet,
- bool annul) const
+ unsigned delaySlotInstr)
{
+ assert(m_pCurrSnippet && "Invalid snippet for code generation");
+ std::vector<unsigned>& snippet = *m_pCurrSnippet;
+
unsigned initSize = snippet.size();
// Calculate address of branch instruction from slotBase
uint64_t branchInstAddr = slotBase + getInstWidth() * snippet.size();
- // Add branch instruction and nop (for branch delay slot) to code snippet.
- snippet.push_back(getBranchAlways(dest, branchInstAddr, annul));
- snippet.push_back(NOP_INST);
+ // Add branch instruction and the specified delay slot instruction to code snippet.
+ snippet.push_back(getBranchAlways(dest, branchInstAddr, false)); // annul bit low
+ snippet.push_back(delaySlotInstr);
assert(snippet.size() - initSize == getGenBranchAlwaysSize() &&
"Unexpected number of instruction in code sequence for branch-always");
Index: llvm/lib/Reoptimizer/Inst/InstManip.h
diff -u llvm/lib/Reoptimizer/Inst/InstManip.h:1.9 llvm/lib/Reoptimizer/Inst/InstManip.h:1.10
--- llvm/lib/Reoptimizer/Inst/InstManip.h:1.9 Tue Apr 29 13:36:53 2003
+++ llvm/lib/Reoptimizer/Inst/InstManip.h Tue Apr 29 21:08:42 2003
@@ -77,15 +77,24 @@
class InstManip
{
public:
- InstManip(VirtualMem* vm): m_pVM(vm)
- {
- assert(vm && "InstManip requires valid VirtualMem instance");
- }
+ InstManip(VirtualMem* vm);
typedef std::pair<uint64_t, unsigned> Inst; // (location, inst word) pair
- enum TargetRegister { REG_0, REG_1 };
-
+ // Logical registers used by clients of this class, mapped to machine-specific IDs
+ // by the logical -> actual register map.
+ enum LogicalRegister {
+ REG_0,
+ REG_1,
+ REG_2
+ };
+
+ // Offsets in stack frame for function parameters
+ enum StackOffset {
+ PARAM_0 = 128,
+ PARAM_1 = 136
+ };
+
void printRange(unsigned* start, unsigned* end) const;
inline void printRange(uint64_t start, uint64_t end) const;
@@ -94,38 +103,41 @@
uint64_t skipFunctionHdr(uint64_t addr) const;
+ void startCode(std::vector<unsigned>& snippet) { m_pCurrSnippet = &snippet; }
+ void endCode() { m_pCurrSnippet = 0; }
+
void generateAddressCopy(unsigned loadInst,
- std::vector<unsigned>& snippet,
- TargetRegister reg = REG_0) const;
+ LogicalRegister dest,
+ bool afterSave);
+
+ void generateBranchAlways(uint64_t dest,
+ uint64_t slotBase,
+ unsigned delaySlotInstr = NOP_INST);
+
+ void generateCall(uint64_t dest, uint64_t slotBase);
void generateLoad(uint64_t value,
- std::vector<unsigned>& snippet,
- TargetRegister reg = REG_0) const;
+ LogicalRegister dest,
+ LogicalRegister tmp);
- void generateCall(uint64_t dest,
- uint64_t slotBase,
- std::vector<unsigned>& snippet) const;
+ void generateParamStore(LogicalRegister src, StackOffset off);
- void generateRestore(std::vector<unsigned>& snippet) const;
- void generateSave(std::vector<unsigned>& snippet) const;
+ void generateRestore();
+ void generateSave();
- void generateSpillShared(uint64_t spillFromAddr,
- std::vector<unsigned>& snippet,
- TargetRegister reg = REG_0) const;
-
- void generateRestoreShared(uint64_t restorFromAddr,
- std::vector<unsigned>& snippet,
- TargetRegister reg = REG_0) const;
+ void generateRestoreShared(uint64_t restoreFromAddr,
+ LogicalRegister tmp1 = REG_0,
+ LogicalRegister tmp2 = REG_1);
- void generateBranchAlways(uint64_t dest,
- uint64_t slotBase,
- std::vector<unsigned>& snippet,
- bool annul = true) const;
+ void generateSpillShared(uint64_t spillFromAddr,
+ LogicalRegister tmp1 = REG_0,
+ LogicalRegister tmp2 = REG_1);
void findCandidates(uint64_t start,
uint64_t end,
std::vector<InstCandidate>& candidates);
+ unsigned getRestoreInst() const;
inline unsigned getBranchAlways(uint64_t dest, uint64_t pc, bool annulHigh = true) const;
inline unsigned getCallInst(uint64_t dest, uint64_t pc) const;
inline bool isBranch(unsigned inst) const;
@@ -138,18 +150,21 @@
unsigned getGenCallSize() const { return 2; }
unsigned getGenBranchAlwaysSize() const { return 2; }
unsigned getGenSaveSize() const { return 1; }
+ unsigned getGenParamStoreSize() const { return 1; }
unsigned getGenSpillSharedSize() const { return getGenLoadSize() + SHARED_SIZE; }
unsigned getGenRestoreSharedSize() const { return getGenLoadSize() + SHARED_SIZE; }
unsigned getGenRestoreSize() const { return 1; }
unsigned getInstWidth() const { return 4; }
unsigned getSharedSize() const { return SHARED_SIZE; }
- inline unsigned getAddressCopySize(unsigned loadInst) const;
+ inline unsigned getGenAddressCopySize(unsigned loadInst) const;
uint64_t getPhase3SpillAddr() { return (uint64_t) sm_phase3SpillRegion; }
private:
InstManip() {}
+ typedef std::map<LogicalRegister, unsigned> LogicalToActualRegMap;
+ typedef std::map<unsigned, unsigned> OutputToInputRegMap;
bool isCandidateLoad(uint64_t addr,
uint64_t end,
@@ -168,22 +183,33 @@
uint64_t end,
unsigned fpOffset);
- // Branch-always (annul bit high) instruction base (i.e. address not filled in yet)
- static const unsigned BRANCH_ALWAYS_BASE = 0x30480000;
+ VirtualMem* m_pVM;
+ std::vector<unsigned>* m_pCurrSnippet;
+ LogicalToActualRegMap m_logicalToActualReg; // Maps logical -> actual register
+ OutputToInputRegMap m_outputToInputReg; // Maps input register -> output register
+
+ // Branch-always (annul bit high) instruction base (i.e., address not filled in yet)
+ static const unsigned BRANCH_ALWAYS_BASE_ANNUL;
+
+ // Branch-always (annul bit low) instruction base (i.e., address not filled in yet)
+ static const unsigned BRANCH_ALWAYS_BASE;
+
+ // NOP instruction
static const unsigned NOP_INST;
// Size (in number of 64-bit words) required for storing shared registers
static const unsigned SHARED_SIZE = 7;
- VirtualMem* m_pVM;
-
+ // Sparc-specific constant used in SP manipulations
+ static const unsigned BIAS;
+
// Memory region into which to spill shared registers when executing a phase 4 slot
// (i.e., the slot that invokes the phase4 function, the slot written by phase 3
// invocations). NB: One region is sufficient and we do not need stack semantics
// because only one activation of a phase 4 slot ever occurs at a given time (assuming
// single-threaded execution).
- static uint64_t sm_phase3SpillRegion[SHARED_SIZE];
+ static uint64_t sm_phase3SpillRegion[SHARED_SIZE];
};
void InstManip::printRange(uint64_t start, uint64_t end) const
@@ -209,8 +235,9 @@
// branch instruction is executed (i.e., the address of the branch instruction). NB:
// Only handles branch-always-annul-high at the moment
- assert(annul && "Unhandled case: annul bit low");
- return getUndepJumpInstr(BRANCH_ALWAYS_BASE, dest, pc);
+ return getUndepJumpInstr(annul ? BRANCH_ALWAYS_BASE_ANNUL : BRANCH_ALWAYS_BASE,
+ dest,
+ pc);
}
unsigned InstManip::getCallInst(uint64_t dest, uint64_t pc) const
@@ -227,7 +254,7 @@
return ::isBranchInstr(inst);
}
-unsigned InstManip::getAddressCopySize(unsigned loadInst) const
+unsigned InstManip::getGenAddressCopySize(unsigned loadInst) const
{
// Determine the number of instructions required to load the address value used by the
// load instruction into some register.
Index: llvm/lib/Reoptimizer/Inst/Phases.cpp
diff -u llvm/lib/Reoptimizer/Inst/Phases.cpp:1.14 llvm/lib/Reoptimizer/Inst/Phases.cpp:1.15
--- llvm/lib/Reoptimizer/Inst/Phases.cpp:1.14 Tue Apr 29 14:48:29 2003
+++ llvm/lib/Reoptimizer/Inst/Phases.cpp Tue Apr 29 21:08:42 2003
@@ -16,18 +16,18 @@
// slot (annulling bit should specify *not* to execute the branch delay slot) in
// the dummy function.
//
-// 2b. In the new slot, write the contents of the phase 2 slot:
-// +------------------------------+
-// | load parameter for phase 3 |
-// | call to phase 3 |
-// | nop |
-// | branch back to orig code |
-// | nop |
-// +------------------------------+
+// 2b. In the new slot, write the contents of the phase 3 slot:
+// +------------------------------------+
+// | save registers (new stack frame) |
+// | load parameter for phase 3 |
+// | call to phase 3 |
+// | nop |
+// | branch back to orig code |
+// | restore registers |
+// +------------------------------------+
// where the parameter to phase 3 is a pointer the heap-allocated Phase3Info
// instance.
//
-//
// PHASE 3:
//
// - Deallocate the parameter structure whenever it is convenient to do so.
@@ -40,18 +40,17 @@
// 3. For each load-volatile candidate,
// 3a. Obtain a new slot in the dummy function.
// 3b. Replace the load candidate with branch to slot.
-// 3c. In the new slot, write the contents of the phase 3 slot:
+// 3c. In the new slot, write the contents of the phase 4 slot:
// +---------------------------------------+
-// | save registers |
+// | save registers (new stack frame) |
// | save global registers |
// | copy load-src addr to param1 register |
// | load p4 struct ptr to param2 register |
// | call to phase 4 |
// | nop |
-// | restore registers |
// | restore global registers |
// | branch back to orig code |
-// | nop |
+// | restore registers |
// +---------------------------------------+
//
// 4. Deallocate the slot that originated this invocation.
@@ -62,21 +61,11 @@
// 1a. If tag is in GBT, we have a valid candidate, so do step 2.
// 1b. If tag is not in GBT, our candidate is invalid, so delete slot and return to
// original code.
-//
-// 2. Set up the second phase 4 slot that will actually call the instrumentation function:
+//
+// 2. Set up the phase 5 slot that will actually call the instrumentation function:
// +---------------------------------------+
-// | save registers |
-// | save global registers |
-// | call to inst func |
-// | nop |
-// | restore registers |
-// | restore global registers |
-// | branch back to orig code |
-// | nop |
+// | ... |
// +---------------------------------------+
-// This "instrumentation slot" may have to be expanded later to store the return value
-// in an alloca'd temporary, unless the phase4 function itself can invoke the
-// instrumentation function, would be *highly* ideal.
//
#include <stdlib.h>
@@ -285,8 +274,9 @@
std::string funcName;
AddressRange range;
- // TODO: Come up with a better way to do this that doesn't involve storing the entire
- // list of functions here -- this could be quite large.
+ // Obtain the list of functions to transform, from the ElfReader module. TODO: Come
+ // up with a better way to do this that doesn't involve storing the entire list of
+ // functions here -- this could be quite large.
vector<std::pair<std::string, AddressRange> > funcs;
while(elfReader.GetNextFunction(funcName, range))
@@ -294,6 +284,13 @@
cerr << "There are " << funcs.size() << " functions to process." << endl << endl;
+ // Heap-allocate a region of memory in which to spill shared registers before phase3
+ // invocations. We allocate one unit of space (given by InstManip::getSharedSize())
+ // for each function that we transform.
+
+ sm_pSpillRegion = new uint64_t[m_instManip.getSharedSize() * funcs.size()];
+ sm_pCurrSpill = sm_pSpillRegion;
+
for(vector<std::pair<std::string, AddressRange> >::iterator i = funcs.begin(),
e = funcs.end(); i != e; ++i) {
if(i->first == "fibs") {
@@ -334,17 +331,34 @@
"Unhandled case: branch instruction first in function body");
vm->writeInstToVM(repInstAddr, m_instManip.getBranchAlways(slotBase, repInstAddr));
- // Generate a) code to load the address of the heap-allocated Phase3Info struct into a
- // register, which will be used as a parameter to the phase3 call, b) the call to
- // phase 3 itself, and c) the direct branch back to the original code.
+ // Generate the phase 3 slot. See picture of phase 3 slot contents for more info.
Phase3Info* p3info = new Phase3Info(range, origInst, repInstAddr,
slotBase, getSlotSize(), m_pTraceCache);
vector<unsigned> snippet;
- m_instManip.generateLoad((uint64_t) p3info, snippet);
- m_instManip.generateCall((uint64_t) &phase3, slotBase, snippet);
- m_instManip.generateBranchAlways(repInstAddr, slotBase, snippet);
+ m_instManip.startCode(snippet);
+
+ m_instManip.generateSave();
+ m_instManip.generateSpillShared((uint64_t) sm_pCurrSpill);
+ m_instManip.generateLoad((uint64_t) p3info, InstManip::REG_0, InstManip::REG_1);
+ m_instManip.generateCall((uint64_t) &phase3, slotBase);
+ m_instManip.generateRestoreShared((uint64_t) sm_pCurrSpill);
+ m_instManip.generateBranchAlways(repInstAddr, slotBase, m_instManip.getRestoreInst());
+
+ m_instManip.endCode();
+
+ // Dump snippet instructions:
+ cerr << "phase3 slot instructions:" << endl;
+ for(vector<unsigned>::iterator j = snippet.begin(), k = snippet.end(); j != k; ++j) {
+ m_instManip.printInst(*j);
+ cerr << endl;
+ }
+
+ // Bump the current spill pointer to the next "spill slot" in the spill region used
+ // before/after phase3() invocations.
+
+ sm_pCurrSpill += m_instManip.getSharedSize();
// Copy the snippet code into the slot
assert(snippet.size() == getSlotSize() && "Snippet size does not match slot size");
@@ -356,8 +370,11 @@
// The following sum corresponds to the sizes consumed by the various regions of the
// phase 2 slot. See picture of phase 2 contents for details.
- return m_instManip.getGenLoadSize() +
+ return m_instManip.getGenSaveSize() +
+ m_instManip.getGenSpillSharedSize() +
+ m_instManip.getGenLoadSize() +
m_instManip.getGenCallSize() +
+ m_instManip.getGenRestoreSharedSize() +
m_instManip.getGenBranchAlwaysSize();
}
@@ -430,25 +447,32 @@
uint64_t slotBase = replaceInstWithBrToSlot(i->front().first, getSlotSize(*i),
m_pTraceCache, m_instManip);
- // Generate a) code to save the registers, b) instruction(s) to store the load
- // source address into a phase4 parameter register, c) the load of (the
- // pointer-to) the heap-allocated Phase4Info structure into a phase4 parameter
- // register, and d) code to call phase 3, restore regs, and branch back to
- // original code.
+ // Generate the phase 4 slot. See picture of phase 4 slot contents for more info.
Phase4Info* p4info = new Phase4Info(*i, slotBase, getSlotSize(*i), m_pTraceCache);
uint64_t spillAddr = m_instManip.getPhase3SpillAddr();
-
+
vector<unsigned> snippet;
- m_instManip.generateSave(snippet);
- m_instManip.generateAddressCopy(i->front().second, snippet); // Uses InstManip::REG_0, live to call
- m_instManip.generateSpillShared(spillAddr, snippet, InstManip::REG_1);
- m_instManip.generateLoad((uint64_t) p4info, snippet, InstManip::REG_1);
- m_instManip.generateCall((uint64_t) &phase4, slotBase, snippet);
- m_instManip.generateRestoreShared(spillAddr, snippet);
- m_instManip.generateRestore(snippet);
- m_instManip.generateBranchAlways(i->front().first, slotBase, snippet);
+ m_instManip.startCode(snippet);
+
+ // NB: We pass parameters to the phase4 function in REG_0 and REG_1 on the
+ // assumption that the input parameters will be looked for there. However, it is
+ // possible that the input parameters will be taken from the parameter array at
+ // fixed offsets from the stack pointer. Hence, we store the parameters there as
+ // well.
+
+ m_instManip.generateSave();
+ m_instManip.generateAddressCopy(i->front().second, InstManip::REG_0, true); // REG_0 live to call
+ m_instManip.generateParamStore(InstManip::REG_0, InstManip::PARAM_0);
+ m_instManip.generateSpillShared(spillAddr, InstManip::REG_1, InstManip::REG_2);
+ m_instManip.generateLoad((uint64_t) p4info, InstManip::REG_1, InstManip::REG_2); // REG_1 live to call
+ m_instManip.generateParamStore(InstManip::REG_1, InstManip::PARAM_1);
+ m_instManip.generateCall((uint64_t) &phase4, slotBase);
+ m_instManip.generateRestoreShared(spillAddr);
+ m_instManip.generateBranchAlways(i->front().first, slotBase, m_instManip.getRestoreInst());
+
+ m_instManip.endCode();
// Dump snippet instructions:
@@ -474,12 +498,13 @@
// phase 3 slot. See picture of phase 3 contents for details.
return m_instManip.getGenSaveSize() +
- m_instManip.getAddressCopySize(cand.front().second) +
+ m_instManip.getGenAddressCopySize(cand.front().second) +
+ m_instManip.getGenParamStoreSize() +
m_instManip.getGenSpillSharedSize() +
m_instManip.getGenLoadSize() +
+ m_instManip.getGenParamStoreSize() +
m_instManip.getGenCallSize() +
m_instManip.getGenRestoreSharedSize() +
- m_instManip.getGenRestoreSize() +
m_instManip.getGenBranchAlwaysSize();
}
Index: llvm/lib/Reoptimizer/Inst/design.txt
diff -u llvm/lib/Reoptimizer/Inst/design.txt:1.10 llvm/lib/Reoptimizer/Inst/design.txt:1.11
--- llvm/lib/Reoptimizer/Inst/design.txt:1.10 Tue Apr 29 13:36:53 2003
+++ llvm/lib/Reoptimizer/Inst/design.txt Tue Apr 29 21:08:42 2003
@@ -896,6 +896,10 @@
- Write phase 5 slot generation code, phase 5 function itself, etc.
+ - Optimizations:
+ - No need to save registers (other than those clobbered) in phase 3 slot, since phase 3
+ is invoked at the start of the function. Must still spill/restore shared, though.
+
}}}
{{{ PHASE OUTLINE
More information about the llvm-commits
mailing list