[llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/InstManip.cpp InstManip.h Phases.cpp
Joel Stanley
jstanley at cs.uiuc.edu
Tue Apr 15 16:24:01 PDT 2003
Changes in directory llvm/lib/Reoptimizer/Inst:
InstManip.cpp updated: 1.6 -> 1.7
InstManip.h updated: 1.6 -> 1.7
Phases.cpp updated: 1.10 -> 1.11
---
Log message:
Load candidate heuristic is implemented, phase 4 being invoked with
register contents being passed as a parameter.
---
Diffs of the changes:
Index: llvm/lib/Reoptimizer/Inst/InstManip.cpp
diff -u llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.6 llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.7
--- llvm/lib/Reoptimizer/Inst/InstManip.cpp:1.6 Fri Apr 11 18:57:07 2003
+++ llvm/lib/Reoptimizer/Inst/InstManip.cpp Tue Apr 15 16:26:19 2003
@@ -17,6 +17,31 @@
using std::cerr;
using std::endl;
+std::ostream& operator<<(std::ostream& ostr,
+ const InstCandidate& cand)
+{
+ cand.print(ostr);
+ return ostr;
+}
+
+void InstCandidate::print(std::ostream& ostr) const
+{
+ ostr << "InstCandidate {" << endl;
+ ostr << " type = "
+ << (m_type == DIRECT ? "DIRECT" : "STACK XFER")
+ << endl;
+ ostr << " Instruction dump (address, inst):" << endl;
+
+ for(std::vector<std::pair<uint64_t, unsigned> >::const_iterator i =
+ m_insts.begin(), e = m_insts.end(); i != e; ++i) {
+ ostr << std::hex << " (" << i->first << ", " << std::flush;
+ sparc_print(i->second);
+ fflush(stdout);
+ ostr << ")" << endl;
+ }
+ ostr << "}";
+}
+
void InstManip::printRange(unsigned* start, unsigned* end) const
{
// Dumps contents (and corresponding disassembly) of memory range given by range
@@ -93,9 +118,28 @@
"Unexpected number of instructions in code sequence for 64-bit value -> %destReg");
}
+void InstManip::generateAddressCopy(unsigned loadInst,
+ std::vector<unsigned>& snippet,
+ TargetRegister reg) const
+{
+ unsigned destReg = (reg == REG_0) ? R_O0 : R_O1;
+ unsigned rs1 = RD_FLD(loadInst, INSTR_RS1);
+
+ if(RD_FLD(loadInst, INSTR_I)) {
+ // Case 1: load is immediate-valued --> reg, imm value add instruction needed
+ unsigned imm = RD_FLD(loadInst, INSTR_SIMM13);
+ snippet.push_back(MK_ADD_R_I(destReg, rs1, imm));
+ }
+ else {
+ // Case 2: load is register-valued --> reg, reg add instruction needed
+ unsigned rs2 = RD_FLD(loadInst, INSTR_RS2);
+ snippet.push_back(MK_ADD_R_R(destReg, rs1, rs2));
+ }
+}
+
void InstManip::generateCall(uint64_t dest,
uint64_t slotBase,
- std::vector<unsigned>& snippet)
+ std::vector<unsigned>& snippet) const
{
unsigned initSize = snippet.size();
@@ -110,10 +154,22 @@
"Unexpected number of instructions in code sequence for call");
}
+void InstManip::generateRestore(std::vector<unsigned>& snippet) const
+{
+ // restore %o0, 0, %o0
+ snippet.push_back(MK_RESTORE(R_O0, R_O0, 0));
+}
+
+void InstManip::generateSave(std::vector<unsigned>& snippet) const
+{
+ // save %o0, 0, %o0
+ snippet.push_back(MK_SAVE(R_O0, R_O0, 0));
+}
+
void InstManip::generateBranchAlways(uint64_t dest,
uint64_t slotBase,
std::vector<unsigned>& snippet,
- bool annul)
+ bool annul) const
{
unsigned initSize = snippet.size();
@@ -133,25 +189,31 @@
std::vector<InstCandidate>& candidates)
{
for(uint64_t currAddr = start; currAddr <= end; currAddr += getInstWidth()) {
- unsigned inst = m_pVM->readInstrFrmVm(currAddr);
-
- cout << "findCandidates processing instruction:\t";
- printInst(m_pVM->readInstrFrmVm(currAddr));
- cout << endl;
-
InstCandidate cand;
if(isCandidateLoad(currAddr, end, cand))
- cerr << "It's a candidate load!" << endl;
+ candidates.push_back(cand);
}
}
static inline bool isLoadHalfWord(unsigned inst)
{
- // Returns true if inst is an lduh instruction
+ // Returns true if inst is an LDUH instruction
return RD_FLD(inst, INSTR_OP) == OP_3 &&
RD_FLD(inst, INSTR_OP3) == OP3_LDUH;
}
+static inline bool isLoadByte(unsigned inst)
+{
+ // Returns true if inst is a LDUB instruction
+ return RD_FLD(inst, INSTR_OP) == OP_3 &&
+ RD_FLD(inst, INSTR_OP3) == OP3_LDUB;
+}
+
+static inline bool isFPRelative(unsigned inst)
+{
+ return RD_FLD(inst, INSTR_RS1) == R_BP && RD_FLD(inst, INSTR_I) == 1;
+}
+
static inline bool isSTH(unsigned inst)
{
return RD_FLD(inst, INSTR_OP) == OP_3 &&
@@ -168,7 +230,7 @@
{
// Assumes that inst is a load instruction, and returns the register ID of its
// destination operand.
-
+
return RD_FLD(inst, INSTR_RD);
}
@@ -176,16 +238,80 @@
{
// Assumes that inst is a stb/sth instruction, and returns the register ID of its
// source operand (by source, we don't mean rs1 or rs2, but rather rd, which specifies
- // the register which contains the value being stored)
+ // the register which contains the value being stored);
return RD_FLD(inst, INSTR_RD);
}
+static inline unsigned getFPOffset(unsigned inst)
+{
+ assert(isFPRelative(inst) && "Expect instruction to be FP-relative");
+ return RD_FLD(inst, INSTR_SIMM13);
+}
+
+bool InstManip::determineSchema(InstCandidate& cand,
+ uint64_t end,
+ std::pair<uint64_t, unsigned>& load,
+ std::pair<uint64_t, unsigned>& store)
+{
+ // inst1 contains the load instruction (the actual candidate). inst2 contains the
+ // corresponding store instruction, which is either STB or STH. If STB, take actions
+ // for schema 1, and if STH, schema 2.
+
+ if(isSTB(store.second)) {
+ // Schema 1: "direct" pattern
+ cand.setType(InstCandidate::DIRECT);
+ cand.push_back(load);
+ cand.push_back(store);
+ return true;
+ }
+ else {
+ assert(isSTH(store.second) && "Instruction must be STH");
+
+ // We have potentially discovered an instance of schema 2, but must search
+ // more to determine if this is the case.
+ //
+ // KIS heuristic concession: The STH given by storeInst *must* be storing to the stack
+ // in an fp-relative manner; if not, we deny the originating load's candidacy.
+
+ if(isFPRelative(store.second)) {
+ // Search forward until a LDUB from same stack location (+1) as the STH wrote to
+ // is encountered. The +1 in specified in the FP offset we're searching for is
+ // due to the fact that we stored a half-word but are loading a byte.
+
+ if(uint64_t stkLoadAddr = findNextStackLoad(store.first, end, getFPOffset(store.second) + 1)) {
+ // Last schema-2 search: find the STB instruction that stores from the
+ // LDUB's destination register.
+
+ unsigned ldubInst = m_pVM->readInstrFrmVm(stkLoadAddr);
+ uint64_t stbAddr = findNextStore(stkLoadAddr, end, getLoadDest(ldubInst));
+ unsigned stbInst;
+
+ if(stbAddr && isSTB((stbInst = m_pVM->readInstrFrmVm(stbAddr)))) {
+
+ // All of the criteria have been met for Schema 2, the "stack transfer"
+ // pattern.
+
+ cand.setType(InstCandidate::STACK_XFER);
+ cand.push_back(load);
+ cand.push_back(store);
+ cand.push_back(stkLoadAddr, ldubInst);
+ cand.push_back(stbAddr, stbInst);
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
bool InstManip::isCandidateLoad(uint64_t addr,
uint64_t end,
InstCandidate& cand)
{
// {{{ Description of heuristic
+
// A candidate load is the first instruction in a sequence (with an arbitrary number
// of instructions in between elements of this sequence) that is a "signature" for the
// particular load of a volatile variable which needs to be replaced with a call to an
@@ -214,6 +340,7 @@
// The current heurstic catches both of these patterns (designated "direct" and "stack
// transfer" respectively), and will be extended as insufficiencies in the heuristic
// are revealed.
+
// }}}
// Address of potential candidate load is given by 'addr', maximum search address is
@@ -223,34 +350,44 @@
if(isLoadHalfWord(inst)) {
// Search forward until a sth/stb from inst's target register is encountered
- uint64_t storeAddr = findNextStore(addr, end, getLoadDest(inst));
- if(!storeAddr)
- return false; // No store? Can't be a candidate load.
-
- // If STB, ... If STH, ...
+ if(uint64_t storeAddr = findNextStore(addr, end, getLoadDest(inst))) {
- unsigned storeInst = m_pVM->readInstrFrmVm(storeAddr);
- if(isSTH(storeInst)) {
- cerr << "Discovered sth: " << endl;
- }
- else {
- // STB instruction
- cerr << "Discovered stb: " << endl;
+ // If STB, take actions for schema 1, otherwise check for schema 2 conditions.
+
+ unsigned storeInst = m_pVM->readInstrFrmVm(storeAddr);
+ std::pair<uint64_t, unsigned> inst1(addr, inst);
+ std::pair<uint64_t, unsigned> inst2(storeAddr, storeInst);
+
+ return determineSchema(cand, end, inst1, inst2);
}
-
- printInst(storeInst);
-
- return true;
}
return false;
}
+uint64_t InstManip::findNextStackLoad(uint64_t addr,
+ uint64_t end,
+ unsigned fpOffset)
+{
+ // Sweep the range of addresses starting at addr, up to end, looking for a load byte
+ // that is loading from [%fp + fpOffset]. Return the first such instance, or 0 is such
+ // an instance cannot be found.
+
+ for(uint64_t currAddr = addr; currAddr <= end; currAddr += getInstWidth()) {
+ unsigned inst = m_pVM->readInstrFrmVm(currAddr);
+
+ if(isLoadByte(inst) && isFPRelative(inst) && getFPOffset(inst) == fpOffset)
+ return currAddr;
+ }
+
+ return 0;
+}
+
uint64_t InstManip::findNextStore(uint64_t addr,
uint64_t end,
unsigned srcReg)
{
- // Sweep the range of addresses starting at addr (up to end) looking for stb or sth
+ // Sweep the range of addresses starting at addr, up to end, looking for stb or sth
// instructions that are storing _from_ 'fromReg'. Return the first such instance, or
// 0 if such an instance cannot be found.
Index: llvm/lib/Reoptimizer/Inst/InstManip.h
diff -u llvm/lib/Reoptimizer/Inst/InstManip.h:1.6 llvm/lib/Reoptimizer/Inst/InstManip.h:1.7
--- llvm/lib/Reoptimizer/Inst/InstManip.h:1.6 Fri Apr 11 18:57:07 2003
+++ llvm/lib/Reoptimizer/Inst/InstManip.h Tue Apr 15 16:26:19 2003
@@ -38,7 +38,7 @@
bool isDirect() const { return m_type == DIRECT; }
bool isStackXfer() const { return m_type == STACK_XFER; }
- const std::vector<std::pair<uint64_t, unsigned> >& getInsts() const
+ std::vector<std::pair<uint64_t, unsigned> >& getInsts()
{
return m_insts;
}
@@ -48,6 +48,18 @@
m_insts.push_back(std::make_pair(addr, inst));
}
+ void push_back(std::pair<uint64_t, unsigned>& inst)
+ {
+ m_insts.push_back(inst);
+ }
+
+ const std::pair<uint64_t, unsigned>& front() const
+ {
+ return m_insts.front();
+ }
+
+ void print(std::ostream& ostr) const;
+
protected:
CandType m_type;
@@ -55,6 +67,8 @@
std::vector<std::pair<uint64_t, unsigned> > m_insts;
};
+std::ostream& operator<<(std::ostream& ostr, const InstCandidate& cand);
+
class InstManip
{
public:
@@ -75,18 +89,25 @@
uint64_t skipFunctionHdr(uint64_t addr) const;
+ void generateAddressCopy(unsigned loadInst,
+ std::vector<unsigned>& snippet,
+ TargetRegister reg = REG_0) const;
+
void generateLoad(uint64_t value,
std::vector<unsigned>& snippet,
TargetRegister reg = REG_0) const;
void generateCall(uint64_t dest,
uint64_t slotBase,
- std::vector<unsigned>& snippet);
+ std::vector<unsigned>& snippet) const;
+ void generateRestore(std::vector<unsigned>& snippet) const;
+ void generateSave(std::vector<unsigned>& snippet) const;
+
void generateBranchAlways(uint64_t dest,
uint64_t slotBase,
std::vector<unsigned>& snippet,
- bool annul = true);
+ bool annul = true) const;
void findCandidates(uint64_t start,
uint64_t end,
@@ -99,11 +120,15 @@
// These are functions so when InstManip is superclassed, they'd become virtual, etc.
// In the short term we could use class constants, but this is more clear.
- unsigned getNOP() const { return 0x01000000; }
- unsigned getGenLoadSize() const { return 6; }
- unsigned getGenCallSize() const { return 2; }
- unsigned getGenBranchAlwaysSize() const { return 2; }
- unsigned getInstWidth() const { return 4; }
+ unsigned getNOP() const { return NOP_INST; }
+ unsigned getGenLoadSize() const { return 6; }
+ unsigned getGenCallSize() const { return 2; }
+ unsigned getGenBranchAlwaysSize() const { return 2; }
+ unsigned getGenSaveSize() const { return 1; }
+ unsigned getGenRestoreSize() const { return 1; }
+ unsigned getInstWidth() const { return 4; }
+
+ inline unsigned getAddressCopySize(unsigned loadInst) const;
private:
InstManip() {}
@@ -112,9 +137,18 @@
uint64_t end,
InstCandidate& cand);
+ bool determineSchema(InstCandidate& cand,
+ uint64_t end,
+ std::pair<uint64_t, unsigned>& load,
+ std::pair<uint64_t, unsigned>& store);
+
uint64_t findNextStore(uint64_t addr,
uint64_t end,
unsigned srcReg);
+
+ uint64_t findNextStackLoad(uint64_t addr,
+ uint64_t end,
+ unsigned fpOffset);
// Branch-always (annul bit high) instruction base (i.e. address not filled in yet)
static const unsigned BRANCH_ALWAYS_BASE = 0x30480000;
@@ -163,5 +197,17 @@
{
return ::isBranchInstr(inst);
}
+
+unsigned InstManip::getAddressCopySize(unsigned loadInst) const
+{
+ // Determine the number of instructions required to load the address value used by the
+ // load instruction into some register.
+
+ // Case 1: load is immediate-valued --> add-immediate instruction needed, size is 1 inst
+ // Case 2: load is register-valued --> add-registers instruction needed, size is 1 inst
+
+ return 1;
+}
+
#endif // _INCLUDED_INSTMANIP_H
Index: llvm/lib/Reoptimizer/Inst/Phases.cpp
diff -u llvm/lib/Reoptimizer/Inst/Phases.cpp:1.10 llvm/lib/Reoptimizer/Inst/Phases.cpp:1.11
--- llvm/lib/Reoptimizer/Inst/Phases.cpp:1.10 Fri Apr 11 18:57:07 2003
+++ llvm/lib/Reoptimizer/Inst/Phases.cpp Tue Apr 15 16:26:19 2003
@@ -12,15 +12,21 @@
//
// 2. For each function F (only in text segment preferably), write code to call phase 3.
//
-// 2a. Replace the first instruction in F with a branch to a new slot in the
-// dummy function.
+// 2a. Replace the first (replacable) instruction in F with a branch to a new
+// slot (annulling bit should specify *not* to execute the branch delay slot) in
+// the dummy function.
+//
+// 2b. In the new slot, write the contents of the phase 2 slot:
+// +------------------------------+
+// | load parameter for phase 3 |
+// | call to phase 3 |
+// | nop |
+// | branch back to orig code |
+// | nop |
+// +------------------------------+
+// where the parameter to phase 3 is a pointer the heap-allocated Phase3Info
+// instance.
//
-// 2b. At the new slot write the call to phase 3, passing it a pointer to an
-// info structure which contains the original (replaced) instruction, the
-// address range of the function, etc.
-//
-// 2c. At the end of the new slot write the direct branch back to the original
-// code.
//
// PHASE 3:
//
@@ -32,8 +38,19 @@
// 2. Analyze the function and determine the load-volatile candidates.
//
// 3. For each load-volatile candidate,
-//
-// 3a.
+// 3a. Obtain a new slot in the dummy function.
+// 3b. Replace the load candidate with branch to slot.
+// 3c. In the new slot, write the contents of the phase 3 slot:
+// +---------------------------------------+
+// | save registers |
+// | copy load-src addr to param1 register |
+// | load p4 struct ptr to param2 register |
+// | call to phase 4 |
+// | nop |
+// | restore registers |
+// | branch back to orig code |
+// | nop |
+// +---------------------------------------+
//
// 4. Deallocate the slot that originated this invocation.
//
@@ -66,11 +83,13 @@
unsigned origInst,
uint64_t replaceAddr,
uint64_t slotDescriptor,
+ unsigned slotSize,
TraceCache* pTraceCache):
m_addrRange(addressRange),
m_origInst(origInst),
m_replaceAddr(replaceAddr),
m_slotDescriptor(slotDescriptor),
+ m_slotSize(slotSize),
m_pTraceCache(pTraceCache)
{
}
@@ -89,7 +108,8 @@
uint64_t getEndAddr() const { return m_addrRange.second; }
uint64_t getOrigInst() const { return m_origInst; }
uint64_t getReplaceAddr() const { return m_replaceAddr; }
- uint64_t getSlot() const { return m_slotDescriptor; }
+ uint64_t getSlot() const { return m_slotDescriptor; }
+ uint64_t getSlotSize() const { return m_slotSize; }
TraceCache* getTraceCache() { return m_pTraceCache; }
private:
@@ -99,10 +119,40 @@
unsigned m_origInst; // Instruction replaced by phase 2
uint64_t m_replaceAddr; // Address at which to restore original inst
uint64_t m_slotDescriptor; // Slot created by phase 2
+ unsigned m_slotSize; // Size of slot created by phase 2
TraceCache* m_pTraceCache; // TraceCache instance used by phase 2
};
+class Phase4Info
+{
+ public:
+ Phase4Info(const InstCandidate& candidate,
+ uint64_t slotDescriptor,
+ uint64_t slotSize,
+ TraceCache* pTraceCache):
+ m_candidate(candidate),
+ m_slotDescriptor(slotDescriptor),
+ m_slotSize(slotSize),
+ m_pTraceCache(pTraceCache)
+ {
+ }
+
+ const InstCandidate& getCandidate() const { return m_candidate; }
+ uint64_t getSlot() const { return m_slotDescriptor; }
+ uint64_t getSlotSize() const { return m_slotSize; }
+ TraceCache* getTraceCache() { return m_pTraceCache; }
+
+ private:
+ Phase4Info() {}
+
+ InstCandidate m_candidate; // Candidate responsible for this instance's creation
+ uint64_t m_slotDescriptor; // Slot created by phase 3
+ unsigned m_slotSize; // Size of slot created by phase 3
+ TraceCache* m_pTraceCache; // TraceCache instance used by phases 2 and 3
+};
+
void phase3(Phase3Info* p3info);
+void phase4(uint64_t tag, Phase4Info* p4info);
// Phase2 is the class that is responsible for effecting the core of the phase 2
// transformation; the global function phase2() is simply an C-linkage interface.
@@ -130,16 +180,19 @@
{
public:
Phase3(Phase3Info* p3info);
+ ~Phase3();
+
void transform();
private:
Phase3(): m_instManip(0) {}
- uint64_t m_startAddr;
- uint64_t m_endAddr;
+ void processCandidates(vector<InstCandidate>& candidates);
+ inline unsigned getSlotSize(InstCandidate&) const;
+
+ Phase3Info* m_pPhase3Info;
TraceCache* m_pTraceCache;
InstManip m_instManip;
- uint64_t m_slotDescriptor;
};
@@ -197,21 +250,6 @@
void Phase2::transformFunction(AddressRange& range)
{
- ////////////////
- // 1. Replace the first (replacable) instruction in F with a branch to a new slot
- // (annulling bit should specify *not* to execute the branch delay slot) in the dummy
- // function.
- //
- // 2. In the slot, write:
- //
- // - The code to load the pointer to the heap-allocated Phase3Info instance.
- //
- // - The call to phase 3
- //
- // - The branch back to the location of the replaced instruction (phase 3 will
- // replace the instruction at runtime).
- //
-
// Obtain address of first replacable instruction in function and obtain a new slot from
// the TraceCache memory manager (i.e., a new slot in the dummy function).
@@ -230,7 +268,8 @@
// register, which will be used as a parameter to the phase3 call, b) the call to
// phase 3 itself, and c) the direct branch back to the original code.
- Phase3Info* p3info = new Phase3Info(range, origInst, repInstAddr, slotBase, m_pTraceCache);
+ Phase3Info* p3info = new Phase3Info(range, origInst, repInstAddr,
+ slotBase, getSlotSize(), m_pTraceCache);
vector<unsigned> snippet;
m_instManip.generateLoad((uint64_t) p3info, snippet);
@@ -244,14 +283,8 @@
unsigned Phase2::getSlotSize() const
{
- // A slot used by phase 2 looks like:
- // +------------------------------+
- // | load parameter for phase 3 |
- // | call to phase 3 |
- // | nop |
- // | branch back to orig code |
- // | nop |
- // +------------------------------+
+ // The following sum corresponds to the sizes consumed by the various regions of the
+ // phase 2 slot. See picture of phase 2 contents for details.
return m_instManip.getGenLoadSize() +
m_instManip.getGenCallSize() +
@@ -263,10 +296,27 @@
void phase3(Phase3Info* p3info)
{
Phase3 p3(p3info);
- p3info = 0;
-
p3.transform();
+}
+
+Phase3::Phase3(Phase3Info* p3info):
+ m_pPhase3Info(p3info),
+ m_pTraceCache(p3info->getTraceCache()),
+ m_instManip(p3info->getTraceCache()->getVM())
+{
+ cerr << "================ Begin Phase 3 [" << std::hex
+ << m_pPhase3Info->getStartAddr() << ", " << m_pPhase3Info->getEndAddr()
+ << "] ================\n";
+
+ // 1. Replace the original (replaced) instruction at the proper location in the
+ // original code (thus effectively removing the branch to the slot created by phase 2
+ // as well).
+
+ m_pTraceCache->getVM()->writeInstToVM(p3info->getReplaceAddr(), p3info->getOrigInst());
+}
+Phase3::~Phase3()
+{
// Deallocate the originating slot (i.e. the slot that invoked us).
//
// NB: Yes, we are, in fact, deallocating a memory segment (i.e., the slot obtained by
@@ -275,36 +325,107 @@
// write to it. However, it does indeed pose a problem for multi-threaded codes. A
// modification to the general mechanism itself is required to achieve thread-safety.
- // (TODO)
+ cerr << "About to deallocate phase2-created slot" << endl;
+
+ uint64_t slotBase = m_pPhase3Info->getSlot();
+ unsigned slotSize = m_pPhase3Info->getSlotSize();
+ m_pTraceCache->getMemMgr()->freeTraceMemory(slotBase, slotSize);
+
+ // Deallocate the parameter structure
+ delete m_pPhase3Info;
}
-Phase3::Phase3(Phase3Info* p3info):
- m_instManip(p3info->getTraceCache()->getVM())
+void Phase3::processCandidates(vector<InstCandidate>& candidates)
{
- assert(p3info && "phase3 requires valid Phase3Info ptr");
+ // For each load candidate, obtain a new slot and write the phase 3 slot region
+ // contents into it. See diagram in comments at top of file for more info.
- m_startAddr = p3info->getStartAddr();
- m_endAddr = p3info->getEndAddr();
- m_pTraceCache = p3info->getTraceCache();
- m_slotDescriptor = p3info->getSlot();
-
- cerr << "================ Begin Phase 3 [" << std::hex
- << m_startAddr << ", " << m_endAddr
- << "] ================\n";
+ for(vector<InstCandidate>::iterator i = candidates.begin(), e = candidates.end(); i != e; ++i) {
+ cerr << "Transforming " << *i << endl;
- // Restore the replaced instruction to its original location (thus effectively
- // removing the branch to the slot created by phase 2 as well)
- m_pTraceCache->getVM()->writeInstToVM(p3info->getReplaceAddr(), p3info->getOrigInst());
+ uint64_t slotBase = m_pTraceCache->getMemMgr()->getMemory(getSlotSize(*i));
+ assert(slotBase && "Unable to obtain memory from MemoryManger instance");
+
+ // Replace load candidate instruction with a branch to start of slot.
+ VirtualMem* vm = m_pTraceCache->getVM();
+ uint64_t loadAddr = i->front().first;
+ vm->writeInstToVM(loadAddr, m_instManip.getBranchAlways(slotBase, loadAddr));
+
+ // Generate a) code to save the registers, b) instruction(s) to store the load
+ // source address into a phase4 parameter register, c) the load of (the
+ // pointer-to) the heap-allocated Phase4Info structure into a phase4 parameter
+ // register, and d) code to call phase 3, restore regs, and branch back to
+ // original code.
+
+ Phase4Info* p4info = new Phase4Info(*i, slotBase, getSlotSize(*i), m_pTraceCache);
+
+ vector<unsigned> snippet;
+ m_instManip.generateSave(snippet);
+ m_instManip.generateAddressCopy(i->front().second, snippet);
+ m_instManip.generateLoad((uint64_t) p4info, snippet, InstManip::REG_1);
+ m_instManip.generateCall((uint64_t) &phase4, slotBase, snippet);
+ m_instManip.generateRestore(snippet);
+ m_instManip.generateBranchAlways(i->front().first, slotBase, snippet);
+
+ // Dump snippet instructions:
+
+ cerr << "phase4 slot instructions:" << endl;
+
+ for(vector<unsigned>::iterator j = snippet.begin(), k = snippet.end(); j != k; ++j) {
+ m_instManip.printInst(*j);
+ cerr << endl;
+ }
- // Deallocate the parameter structure
- delete p3info;
+ // Copy the snippet code into the slot
+ assert(snippet.size() == getSlotSize(*i) && "Snippet size does not match slot size");
+ copySnippetToSlot(snippet, slotBase, vm, &m_instManip);
+
+ // just one candidate for now
+ break;
+ }
+}
+
+unsigned Phase3::getSlotSize(InstCandidate& cand) const
+{
+ // The following sum corresponds to the sizes consumed by the various regions of the
+ // phase 3 slot. See picture of phase 3 contents for details.
+
+ return m_instManip.getGenSaveSize() +
+ m_instManip.getAddressCopySize(cand.front().second) +
+ m_instManip.getGenLoadSize() +
+ m_instManip.getGenCallSize() +
+ m_instManip.getGenRestoreSize() +
+ m_instManip.getGenBranchAlwaysSize();
}
void Phase3::transform()
{
- // Gather up the instruction candidates within the function we to transform.
+ // 2. Analyze the function and determine the load-volatile candidates...
vector<InstCandidate> candidates;
- m_instManip.findCandidates(m_startAddr, m_endAddr, candidates);
+ m_instManip.findCandidates(m_pPhase3Info->getStartAddr(),
+ m_pPhase3Info->getEndAddr(),
+ candidates);
+ // ...and process them
+ processCandidates(candidates);
cerr << "============================== End Phase 3 ==============================\n";
+}
+
+//////////////// Phase4 implementation ////////////////
+
+void phase4(uint64_t tag, Phase4Info* p4info)
+{
+ cerr << "phase4 invoked!" << endl;
+
+ cerr << "tag is " << std::hex << tag << endl;
+
+ cerr << "inst candidate inside info structure is: " << endl;
+ cerr << p4info->getCandidate() << endl;
+
+ // (TEMP) For now, restore the candidate load to its original position for debugging
+ // purposes.
+
+ p4info->getTraceCache()->getVM()->writeInstToVM(p4info->getCandidate().front().first,
+ p4info->getCandidate().front().second);
+ delete p4info;
}
More information about the llvm-commits
mailing list