[llvm-commits] CVS: llvm/lib/Reoptimizer/Inst/lib/InstManip.h PhaseInfo.h Phases.cpp SparcInstManip.cpp SparcInstManip.h design.txt
Joel Stanley
jstanley at cs.uiuc.edu
Mon May 12 21:01:48 PDT 2003
Changes in directory llvm/lib/Reoptimizer/Inst/lib:
InstManip.h updated: 1.16 -> 1.17
PhaseInfo.h updated: 1.4 -> 1.5
Phases.cpp updated: 1.26 -> 1.27
SparcInstManip.cpp updated: 1.6 -> 1.7
SparcInstManip.h updated: 1.7 -> 1.8
design.txt updated: 1.13 -> 1.14
---
Log message:
Final checkin before abandoning the alloca approach altogether. Present for posterity.
---
Diffs of the changes:
Index: llvm/lib/Reoptimizer/Inst/lib/InstManip.h
diff -u llvm/lib/Reoptimizer/Inst/lib/InstManip.h:1.16 llvm/lib/Reoptimizer/Inst/lib/InstManip.h:1.17
--- llvm/lib/Reoptimizer/Inst/lib/InstManip.h:1.16 Thu May 8 11:27:25 2003
+++ llvm/lib/Reoptimizer/Inst/lib/InstManip.h Mon May 12 21:00:22 2003
@@ -26,6 +26,8 @@
#include <map>
#include <vector>
+#include "Phase1/Intraphase.h"
+
class TraceCache;
class Phase2;
class Phase3;
@@ -33,7 +35,7 @@
class InstCandidate;
class Phase3Info;
class Phase4Info;
-struct PrimInfo;
+struct GBTElem;
class InstManip
{
@@ -69,7 +71,7 @@
std::vector<unsigned>& snippet) = 0;
// For the phase 5 slot
- virtual void buildSlot(PrimInfo* pi,
+ virtual void buildSlot(GBTElem* gbte,
uint64_t slotBase,
uint64_t instAddr,
const std::pair<uint64_t, uint64_t>& extents,
@@ -77,7 +79,7 @@
virtual unsigned getSlotSize(Phase2* p2) const = 0;
virtual unsigned getSlotSize(Phase3* p3, InstCandidate& cand) const = 0;
- virtual unsigned getSlotSize(Phase4* p4) const = 0;
+ virtual unsigned getSlotSize(Phase4* p4, pp::GBTEntryType type) const = 0;
// findCandidates - Build the vector of instruction candidates that occur in the
// region defined by the given addresses. This is necessarily a platform-dependent
Index: llvm/lib/Reoptimizer/Inst/lib/PhaseInfo.h
diff -u llvm/lib/Reoptimizer/Inst/lib/PhaseInfo.h:1.4 llvm/lib/Reoptimizer/Inst/lib/PhaseInfo.h:1.5
--- llvm/lib/Reoptimizer/Inst/lib/PhaseInfo.h:1.4 Fri May 9 23:01:50 2003
+++ llvm/lib/Reoptimizer/Inst/lib/PhaseInfo.h Mon May 12 21:00:22 2003
@@ -15,18 +15,20 @@
#define DEBUG 1
#if DEBUG
-#define DEBUG_MSG(x) std::cerr << x
+#define VERBOSE 1
+#define DEBUG_MSG(v, x) if(VERBOSE >= v) std::cerr << x
#else
-#define DEBUG_MSG(x)
+#define DEBUG_MSG(v, x)
#endif
typedef std::pair<uint64_t, uint64_t> AddressRange;
-typedef struct PrimInfo {
+typedef struct GBTElem {
unsigned gbtType;
unsigned short* loadVar;
unsigned gbtStartIdx;
unsigned paramSize;
+ void* instFunc;
};
class Phase3Info
Index: llvm/lib/Reoptimizer/Inst/lib/Phases.cpp
diff -u llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.26 llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.27
--- llvm/lib/Reoptimizer/Inst/lib/Phases.cpp:1.26 Fri May 9 23:01:50 2003
+++ llvm/lib/Reoptimizer/Inst/lib/Phases.cpp Mon May 12 21:00:23 2003
@@ -2,7 +2,7 @@
// programmer: Joel Stanley
// date: Fri Apr 4 16:59:48 CST 2003
// fileid: Phases.cpp
-// purpose: Implements various runtime phases of the peformance-oriented language
+// purpose: Implements runtime phases 2-5 of the peformance-oriented language
// extensions.
//
// PHASE 2:
@@ -78,7 +78,7 @@
// obtained in the same manner.
extern unsigned ppGBTSize;
-extern PrimInfo ppGBT[];
+extern GBTElem ppGBT[];
typedef std::pair<uint64_t, uint64_t> AddressRange;
@@ -161,10 +161,10 @@
void Phase2::transform()
{
- DEBUG_MSG("============================== Begin Phase 2 ==============================\n");
+ DEBUG_MSG(1, "============================== Begin Phase 2 ==============================\n");
const char* execName = getexecname();
- DEBUG_MSG("Executable name is: " << execName << endl);
+ DEBUG_MSG(1, "Executable name is: " << execName << endl);
ElfReader elfReader(execName);
@@ -179,7 +179,7 @@
while(elfReader.findNextSymbol(funcName, range, m_pIM->getInstWidth()))
funcs.push_back(std::make_pair(funcName, range));
- DEBUG_MSG("There are " << funcs.size() << " functions to process." << endl);
+ DEBUG_MSG(1, "There are " << funcs.size() << " functions to process." << endl);
m_pIM->makePhase3SpillRegion(funcs.size());
@@ -191,13 +191,13 @@
//cerr << i->first << " is to be transformed" << endl;
if(i->first == "fibs") {
- DEBUG_MSG("Transforming function " << i->first << "...\n");
+ DEBUG_MSG(1, "Transforming function " << i->first << "...\n");
transformFunction(i->second);
}
}
}
- DEBUG_MSG("============================== End Phase 2 ===========================\n");
+ DEBUG_MSG(1, "============================== End Phase 2 ===========================\n");
}
@@ -262,8 +262,8 @@
vector<unsigned> snippet;
m_pIM->buildSlot(p3info, snippet);
-#if DEBUG
- DEBUG_MSG("phase3 slot instructions:\n");
+ DEBUG_MSG(2, "phase3 slot instructions:\n");
+#if VERBOSE > 1
dumpSnippet(snippet, m_pIM);
#endif
@@ -284,7 +284,7 @@
m_pTC(p3info->getTraceCache()),
m_pIM(p3info->getIM())
{
- DEBUG_MSG("================ Begin Phase 3 [" << std::hex
+ DEBUG_MSG(1, "================ Begin Phase 3 [" << std::hex
<< m_pPhase3Info->getRange().first << ", "
<< m_pPhase3Info->getRange().second
<< "] ================\n");
@@ -319,26 +319,26 @@
// For each load candidate, obtain a new slot and write the phase 4 slot region
// contents into it.
- DEBUG_MSG("There are " << candidates.size() << " candidates to process\n");
+ DEBUG_MSG(1, "There are " << candidates.size() << " candidates to process\n");
- for(vector<InstCandidate>::iterator i = candidates.begin(), e = candidates.end(); i != e; ++i) {
- DEBUG_MSG("Transforming " << *i << endl);
- unsigned slotSize = m_pIM->getSlotSize(this, *i);
+ for(unsigned i = 0, e = candidates.size(); i < e; ++i) {
+ DEBUG_MSG(1, "Transforming " << candidates[i] << endl);
+ unsigned slotSize = m_pIM->getSlotSize(this, candidates[i]);
// Replace load candidate instruction with a branch to the start of a new slot.
- uint64_t slotBase = replaceInstWithBrToSlot(i->front().first, slotSize,
+ uint64_t slotBase = replaceInstWithBrToSlot(candidates[i].front().first, slotSize,
m_pTC, m_pIM);
// Build the Phase4Info structure and generate the phase 4 slot.
- Phase4Info* p4info = new Phase4Info(*i, m_pPhase3Info->getRange(),
+ Phase4Info* p4info = new Phase4Info(candidates[i], m_pPhase3Info->getRange(),
slotBase, slotSize, m_pTC, m_pIM);
vector<unsigned> snippet;
m_pIM->buildSlot(p4info, snippet);
-#if DEBUG
- DEBUG_MSG("phase4 slot instructions:\n");
+ DEBUG_MSG(2, "phase4 slot instructions:\n");
+#if VERBOSE > 1
dumpSnippet(snippet, m_pIM);
#endif
@@ -346,7 +346,7 @@
copySnippetToSlot(snippet, slotBase, m_pTC->getVM(), m_pIM);
// just one candidate for now
- break;
+ //break;
}
}
@@ -358,7 +358,7 @@
// ...and process them
processCandidates(candidates);
- DEBUG_MSG("============================== End Phase 3 ==============================\n");
+ DEBUG_MSG(1, "============================== End Phase 3 ==============================\n");
}
//////////////// Phase4 implementation ////////////////
@@ -375,7 +375,7 @@
m_pIM(p4info->getIM()),
m_tag(tag)
{
- DEBUG_MSG("================ Begin Phase 4 ================\n");
+ DEBUG_MSG(1, "================ Begin Phase 4 ================\n");
}
Phase4::~Phase4()
@@ -407,7 +407,7 @@
}
}
-static PrimInfo* searchGBT(uint64_t tag)
+static GBTElem* searchGBT(uint64_t tag)
{
// Traverse the GBT and determine if the tag is there.
for(unsigned i = 0; i < ppGBTSize; ++i) {
@@ -421,13 +421,13 @@
void Phase4::transform()
{
-#if DEBUG
- //dumpGBT(cerr);
- DEBUG_MSG("tag is " << m_tag << ", and ");
+#if VERBOSE > 0
+ dumpGBT(cerr);
#endif
+ DEBUG_MSG(1, "tag is " << m_tag << ", and ");
- if(PrimInfo* pi = searchGBT(m_tag)) {
- DEBUG_MSG("matches.\n");
+ if(GBTElem* gbte = searchGBT(m_tag)) {
+ DEBUG_MSG(1, "matches.\n");
const InstCandidate& cand = m_pPhase4Info->getCandidate();
assert(cand.getInsts().size() >= 2
@@ -444,15 +444,15 @@
// Obtain memory (& rewrite branch) to the phase 5 slot.
- unsigned slotSize = m_pIM->getSlotSize(this);
+ unsigned slotSize = m_pIM->getSlotSize(this, (pp::GBTEntryType) gbte->gbtType);
uint64_t repAddr = cand.front().first;
uint64_t slotBase = replaceInstWithBrToSlot(repAddr, slotSize, m_pTC, m_pIM);
vector<unsigned> snippet;
- m_pIM->buildSlot(pi, slotBase, repAddr, m_pPhase4Info->getRange(), snippet);
+ m_pIM->buildSlot(gbte, slotBase, repAddr, m_pPhase4Info->getRange(), snippet);
-#if DEBUG
- DEBUG_MSG("phase 5 slot contents:\n");
+ DEBUG_MSG(2, "phase 5 slot contents:\n");
+#if VERBOSE > 1
dumpSnippet(snippet, m_pIM);
#endif
@@ -470,7 +470,7 @@
#endif
}
else {
- DEBUG_MSG("does not match\n");
+ DEBUG_MSG(1, "does not match\n");
// The candidate failed to get elected, so pack up and go home. Restore the
// replaced instruction (i.e. the branch that invoked this code) with the original
// instruction at that location.
@@ -488,15 +488,36 @@
m_pPhase4Info->getCandidate().front().second);
#endif
- DEBUG_MSG("================ End Phase 4 ================\n");
+ DEBUG_MSG(1, "================ End Phase 4 ================\n");
}
//////////////// Phase 5 implementation ////////////////
-void phase5(PrimInfo* pi, void* paramMem)
+void phase5(GBTElem* gbte, void* paramMem, void* startParamMem)
{
- DEBUG_MSG("phase5 function invoked\n");
- DEBUG_MSG("pi->paramSize == " << pi->paramSize << endl);
- DEBUG_MSG("pi->loadVar (tag) == " << pi->loadVar << endl);
- DEBUG_MSG("phase 5 function exiting\n");
+ DEBUG_MSG(1, "phase5 function invoked\n");
+ DEBUG_MSG(1, "gbte->gbtType == " << gbte->gbtType << endl);
+ DEBUG_MSG(1, "gbte->paramSize == " << gbte->paramSize << endl);
+ DEBUG_MSG(1, "gbte->loadVar (tag) == " << gbte->loadVar << endl);
+ DEBUG_MSG(1, "Calling instrumentation function...\n");
+
+ switch(gbte->gbtType){
+ case pp::GBT_INTERVAL_START: {
+ DEBUG_MSG(1, "paramMem address is " << paramMem << endl);
+ DEBUG_MSG(1, "sp+BIAS+off is " << startParamMem << endl);
+ void (*instFunc)(void*) = (void (*)(void*)) gbte->instFunc;
+ instFunc(paramMem);
+ break;
+ }
+ case pp::GBT_INTERVAL_END: {
+ DEBUG_MSG(1, "paramMem address is " << paramMem << endl);
+ DEBUG_MSG(1, "sp + BIAS + off address is " << startParamMem << endl);
+ //DEBUG_MSG(1, "startParamMem address is " << startParamMem << endl);
+ //void (*instFunc)(void*, void*) = (void (*)(void*, void*)) gbte->instFunc;
+ //instFunc(paramMem, startParamMem);
+ break;
+ }
+ }
+
+ DEBUG_MSG(1, "phase 5 function exiting\n");
}
Index: llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.cpp
diff -u llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.cpp:1.6 llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.cpp:1.7
--- llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.cpp:1.6 Fri May 9 23:01:50 2003
+++ llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.cpp Mon May 12 21:00:23 2003
@@ -42,7 +42,7 @@
// | alloc spill area/reg save/inst param region on stack |
// | manually-save clobbered registers |
// | spill shared registers |
-// | copy PrimInfo ptr to param 1 |
+// | copy GBTElem ptr to param 1 |
// | copy spill area addr to param 2 |
// | call phase 5 |
// | nop |
@@ -77,7 +77,7 @@
void phase3(Phase3Info* p3info);
void phase4(uint64_t tag, Phase4Info* p4info);
-void phase5(PrimInfo* pi, void* paramMem);
+void phase5(GBTElem* gbte, void* paramMem, void* startParamMem);
SparcInstManip::SparcInstManip(TraceCache* tc):
InstManip(tc, SHARED_SIZE, INST_WIDTH, NOP_INST),
@@ -168,67 +168,23 @@
"Snippet size does not match expected slot size");
}
-void SparcInstManip::buildSlot(PrimInfo* pi,
+void SparcInstManip::buildSlot(GBTElem* gbte,
uint64_t slotBase,
uint64_t instAddr,
const std::pair<uint64_t, uint64_t>& extents,
std::vector<unsigned>& snippet)
{
- // Before we generate code to spill the shared registers, we must first search in the
- // vicinity of the instrumentation site (i.e., the branch to the slot, formerly the
- // load-volatile) to discover a marker alloca that will tell us the correct offset in
- // the current stack frame. The search boundaries are given by the 'extents' pair,
- // which is the address range of the enclosing function.
-
- unsigned offset = findAllocaOffset(instAddr, extents);
- unsigned sharedSize = WORD_WIDTH * getSharedSize();
- unsigned stkSize = sharedSize + WORD_WIDTH * 2 + pi->paramSize;
-
- if(stkSize % STACK_ALIGN != 0)
- cerr << "Warning: not multiple of " << STACK_ALIGN << endl;
-
- DEBUG_MSG("buildSlot(p5) obtained offset " << std::dec
- << offset << std::hex << endl);
-
- // After our alloca'd stack region looks like:
- // sp + BIAS + stkSize -> +--------------------------------+
- // | inst function parameter memory | } pi->paramSize
- // +--------------------------------+
- // | save area for clobbered regs | } WORD_WIDTH * 2
- // +--------------------------------+
- // | spill region for shared regs | } sharedSize
- // sp + BIAS + offset -> +--------------------------------+
-
- // TODO: ensure that stack size is aligned properly
-
- startCode(snippet);
-
- generateAlloca(stkSize);
-
- // "Manually" save REG_0, REG_1
- generateStackStore(REG_0, offset + sharedSize);
- generateStackStore(REG_1, offset + sharedSize + WORD_WIDTH);
-
- generateSpillShared(offset);
-
- generateLoad((uint64_t) pi, REG_0, REG_1); // REG_0 live to call
- generateStackStore(REG_0, PARAM_0);
-
- generateSPLoad(REG_1, offset + stkSize - pi->paramSize); // REG_1 live to call
- generateStackStore(REG_1, PARAM_1);
-
- generateCall((uint64_t) &phase5, slotBase);
- generateRestoreShared(offset);
-
- // "Manually" restore REG_0, REG_1
- generateStackLoad(REG_0, offset + sharedSize);
- generateStackLoad(REG_1, offset + sharedSize + WORD_WIDTH);
-
- // We need to branch back to one instruction beyond the branch to the phase 5 slot.
- generateBranchAlways(instAddr + getInstWidth(), slotBase, getNOP());
- endCode();
-
- // TODO: Add assert against against the snippet size.
+ switch(gbte->gbtType) {
+ case pp::GBT_INTERVAL_START:
+ buildStartIntervalSlot(gbte, slotBase, instAddr, extents, snippet);
+ return;
+ case pp::GBT_INTERVAL_END:
+ buildEndIntervalSlot(gbte, slotBase, instAddr, extents, snippet);
+ return;
+ default:
+ assert(0 && "Unhandled gbtType encountered");
+ return;
+ }
}
unsigned SparcInstManip::getSlotSize(Phase2* p2) const
@@ -266,7 +222,7 @@
GEN_BRANCH_ALWAYS_SIZE;
}
-unsigned SparcInstManip::getSlotSize(Phase4* p4) const
+unsigned SparcInstManip::getSlotSize(Phase4* p4, pp::GBTEntryType type) const
{
// The following sum corresponds to the sizes consumed by the various regions of the
// the slot constructed by phase 4, called the phase 5 slot. See ASCII diagram of
@@ -280,8 +236,11 @@
GEN_SPL_STK_SIZE +
GEN_LOAD_SIZE +
GEN_STKSTORE_SIZE +
- GEN_SPLOAD_SIZE +
+ GEN_SPOFFSET_SIZE +
GEN_STKSTORE_SIZE +
+ // FIXME
+ //(type == pp::GBT_INTERVAL_END) ?
+ // GEN_SPOFFSET_SIZE + GEN_STKSTORE_SIZE : 0) +
GEN_CALL_SIZE +
GEN_STKSTORE_SIZE +
GEN_UNSPL_STK_SIZE +
@@ -338,18 +297,20 @@
fflush(stdout);
}
-void SparcInstManip::generateSPLoad(LogicalRegister reg, unsigned offset)
+void SparcInstManip::generateSPOffset(LogicalRegister reg, unsigned offset)
{
- // Loads the value of %sp + offset into reg
+ // Loads the value of %sp + BIAS + offset into reg
assert(m_pCurrSnippet && "Invalid snippet for code generation");
unsigned initSize = m_pCurrSnippet->size();
- m_pCurrSnippet->push_back(MK_ADD_R_I(m_logicalToActualReg[reg], R_O6, offset));
+ m_pCurrSnippet->push_back(MK_ADD_R_I(m_logicalToActualReg[reg], R_O6, offset + BIAS));
- assert(m_pCurrSnippet->size() - initSize == GEN_SPLOAD_SIZE &&
+ assert(m_pCurrSnippet->size() - initSize == GEN_SPOFFSET_SIZE &&
"Unexpected number of instructions in code sequence for SP load");
}
+// generateAlloca - Generate code to allocate 'size' bytes on the stack
+
void SparcInstManip::generateAlloca(unsigned size)
{
assert(m_pCurrSnippet && "Invalid snippet for code generation");
@@ -537,8 +498,8 @@
void SparcInstManip::generateRestoreShared(unsigned offset)
{
- // Un-spill from the stack -- assumes %sp + BIAS + offset points to a valid stack
- // location.
+ // Generate code to un-spill the shared registers from the memory at
+ // %sp + BIAS + offset. Assumes %sp + BIAS + offset points to a valid stack location.
assert(m_pCurrSnippet && "Invalid snippet for code generation");
unsigned initSize = m_pCurrSnippet->size();
@@ -553,6 +514,9 @@
LogicalRegister tmp1,
LogicalRegister tmp2)
{
+ // Generate code to un-spill the shared registers from restoreFromAddr, using tmp1 and
+ // tmp2 as temporary registers
+
assert(m_pCurrSnippet && "Invalid snippet for code generation");
assert(tmp1 != tmp2 && "Distinct logical registers required");
@@ -582,9 +546,9 @@
void SparcInstManip::generateSpillShared(unsigned offset)
{
- // Spill to the stack -- assumes %sp + BIAS + offset points to a valid stack location,
- // and that there is sufficient valid memory at %sp + BIAS + offset for the entire
- // spill size.
+ // Spill the shared registers to the stack (i.e. the memory at %sp + BIAS + offset).
+ // Assumes %sp + BIAS + offset points to a valid stack location, and that there is
+ // sufficient valid memory at %sp + BIAS + offset for the entire spill size.
assert(m_pCurrSnippet && "Invalid snippet for code generation");
unsigned initSize = m_pCurrSnippet->size();
@@ -599,6 +563,9 @@
LogicalRegister tmp1,
LogicalRegister tmp2)
{
+ // Generate code to spill the shared registers to spillFromAddr, using tmp1 and tmp2
+ // as temporary registers
+
assert(m_pCurrSnippet && "Invalid snippet for code generation");
assert(tmp1 != tmp2 && "Distinct logical registers required");
@@ -860,21 +827,22 @@
if(0 == (imm - BIAS) % STACK_ALIGN) {
offset = imm - BIAS;
- DEBUG_MSG("Alloca marker case (a)\n");
+ DEBUG_MSG(1, "Alloca marker case (a)\n");
}
else if(0 == (imm - BIAS - STACK_ALIGN + 1) % STACK_ALIGN) {
offset = imm - BIAS - STACK_ALIGN + 1;
- DEBUG_MSG("Alloca marker case (b)\n");
+ DEBUG_MSG(1, "Alloca marker case (b)\n");
}
else
assert(0 && "Alloca special cases failed, need fallback implementation");
-#if DEBUG
- DEBUG_MSG("Found alloca marker: ");
+ DEBUG_MSG(1, "Found alloca marker: ");
+#if VERBOSE > 0
sparc_print(inst);
fflush(stdout);
- DEBUG_MSG(endl);
#endif
+ DEBUG_MSG(1, endl);
+
return true;
}
@@ -917,3 +885,163 @@
assert(0 && "Failed to find alloca marker");
return 0;
}
+
+void SparcInstManip::buildStartIntervalSlot(GBTElem* gbte,
+ uint64_t slotBase,
+ uint64_t instAddr,
+ const std::pair<uint64_t, uint64_t>& extents,
+ std::vector<unsigned>& snippet)
+{
+ // Before we generate code to spill the shared registers, we must first search in the
+ // vicinity of the instrumentation site (i.e., the branch to the slot, formerly the
+ // load-volatile) to discover a marker alloca that will tell us the correct offset in
+ // the current stack frame. The search boundaries are given by the 'extents' pair,
+ // which is the address range of the enclosing function.
+
+ unsigned offset = findAllocaOffset(instAddr, extents);
+ unsigned sharedSize = WORD_WIDTH * getSharedSize();
+ unsigned stkSize = sharedSize + WORD_WIDTH * 2 + gbte->paramSize;
+
+ if(stkSize % STACK_ALIGN != 0)
+ cerr << "Warning: not multiple of " << STACK_ALIGN << endl;
+
+ DEBUG_MSG(1, "buildStartSlot obtained offset " << std::dec
+ << offset << ", and stack size is " << stkSize << std::hex << endl);
+
+ // After the alloca, our stack region looks like:
+ // sp + BIAS + stkSize -> +--------------------------------+
+ // | inst function parameter memory | } gbte->paramSize
+ // +--------------------------------+
+ // | save area for clobbered regs | } WORD_WIDTH * 2
+ // +--------------------------------+
+ // | spill region for shared regs | } sharedSize
+ // sp + BIAS + offset -> +--------------------------------+
+
+ // TODO: ensure that stack size is aligned properly
+
+ startCode(snippet);
+
+ generateAlloca(stkSize);
+
+ // "Manually" save REG_0, REG_1
+ generateStackStore(REG_0, offset + sharedSize);
+ generateStackStore(REG_1, offset + sharedSize + WORD_WIDTH);
+
+ generateSpillShared(offset);
+
+ generateLoad((uint64_t) gbte, REG_0, REG_1); // REG_0 live to call
+ generateStackStore(REG_0, PARAM_0);
+
+ DEBUG_MSG(1, "param1 (from offset+bias) = " << std::dec
+ << (stkSize - gbte->paramSize)
+ << std::hex << endl);
+
+ generateSPOffset(REG_1, offset + stkSize - gbte->paramSize); // REG_1 live to call
+ generateStackStore(REG_1, PARAM_1);
+
+ //tmp -- store %sp + BIAS + offset in third parameter
+ generateSPOffset(REG_2, offset);
+ generateStackStore(REG_1, PARAM_2);
+ //tmp
+
+ generateCall((uint64_t) &phase5, slotBase);
+ generateRestoreShared(offset);
+
+ // "Manually" restore REG_0, REG_1
+ generateStackLoad(REG_0, offset + sharedSize);
+ generateStackLoad(REG_1, offset + sharedSize + WORD_WIDTH);
+
+ // We need to branch back to one instruction beyond the branch to the phase 5 slot.
+ generateBranchAlways(instAddr + getInstWidth(), slotBase, getNOP());
+ endCode();
+
+ // TODO: Add assert against against the snippet size.
+}
+
+void SparcInstManip::buildEndIntervalSlot(GBTElem* gbte,
+ uint64_t slotBase,
+ uint64_t instAddr,
+ const std::pair<uint64_t, uint64_t>& extents,
+ std::vector<unsigned>& snippet)
+{
+ unsigned offset = findAllocaOffset(instAddr, extents);
+ unsigned sharedSize = WORD_WIDTH * getSharedSize();
+ unsigned stkSize = sharedSize + WORD_WIDTH * 2 + gbte->paramSize;
+
+ if(stkSize % STACK_ALIGN != 0)
+ cerr << "Warning: not multiple of STACK_ALIGN" << endl;
+
+ DEBUG_MSG(1, "buildEndSlot obtained offset " << std::dec
+ << offset << ", and stack size is " << stkSize << std::hex << endl);
+
+ // After the alloca, our stack region looks like:
+ // (current implementation assumes no %sp manipulation occurs between start- and
+ // end-region sites)
+ // +--------------------------------+
+ // | ... stk area for start site ...|
+ // sp + BIAS + stkSize -> +--------------------------------+
+ // | inst function param1 memory | } gbte->paramSize
+ // +--------------------------------+
+ // | save area for clobbered regs | } WORD_WIDTH * 2
+ // +--------------------------------+
+ // | spill region for shared regs | } sharedSize
+ // sp + BIAS + offset -> +--------------------------------+
+ //
+
+ // TODO: ensure that stack size is aligned properly
+
+ startCode(snippet);
+
+ generateAlloca(stkSize);
+
+ // "Manually" save REG_0, REG_1
+ generateStackStore(REG_0, offset + sharedSize);
+ generateStackStore(REG_1, offset + sharedSize + WORD_WIDTH);
+
+ generateSpillShared(offset);
+
+ generateLoad((uint64_t) gbte, REG_0, REG_1); // REG_0 live to call
+ generateStackStore(REG_0, PARAM_0);
+
+ DEBUG_MSG(1, "param1 (from offset+bias) = " << std::dec
+ << (stkSize - gbte->paramSize)
+ << std::hex << endl);
+
+ generateSPOffset(REG_1, offset + stkSize - gbte->paramSize); // REG_1 live to call
+ generateStackStore(REG_1, PARAM_1);
+
+ // Generate code for computing the address of stack location where the return value of
+ // the start site is kept (i.e., -paramSize from the top of the start-region stack
+ // region). Note that we have made the KIS concession that no %sp manipulation may
+ // occur in between start- and end-region sites, and that the size of the return
+ // values for the start and end instrumentation functions are equal.
+
+ DEBUG_MSG(1, "param2 (from offset+bias) = " << std::dec
+ << (2 * stkSize - gbte->paramSize)
+ << std::hex << endl);
+
+#if 0
+ generateSPOffset(REG_2, offset + 2 * stkSize - gbte->paramSize);
+ generateStackStore(REG_2, PARAM_2);
+#endif
+
+ //tmp -- store %sp + BIAS + offset in third parameter
+ generateSPOffset(REG_2, offset);
+ generateStackStore(REG_1, PARAM_2);
+ //tmp
+
+ generateCall((uint64_t) &phase5, slotBase);
+ generateRestoreShared(offset);
+
+ // "Manually" restore REG_0, REG_1
+ generateStackLoad(REG_0, offset + sharedSize);
+ generateStackLoad(REG_1, offset + sharedSize + WORD_WIDTH);
+
+ // We need to branch back to one instruction beyond the branch to the phase 5 slot.
+ generateBranchAlways(instAddr + getInstWidth(), slotBase, getNOP());
+ endCode();
+
+ // TODO: Add assert against against the snippet size.
+}
+
+
Index: llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.h
diff -u llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.h:1.7 llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.h:1.8
--- llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.h:1.7 Fri May 9 23:01:50 2003
+++ llvm/lib/Reoptimizer/Inst/lib/SparcInstManip.h Mon May 12 21:00:23 2003
@@ -14,6 +14,8 @@
#include "InstManip.h"
+#include <map>
+
class SparcInstManip : public InstManip
{
public:
@@ -25,7 +27,7 @@
virtual void buildSlot(Phase4Info* p3info,
std::vector<unsigned>& snippet);
- virtual void buildSlot(PrimInfo* pi,
+ virtual void buildSlot(GBTElem* gbte,
uint64_t slotBase,
uint64_t instAddr,
const std::pair<uint64_t, uint64_t>& extents,
@@ -33,7 +35,7 @@
virtual unsigned getSlotSize(Phase2* p2) const;
virtual unsigned getSlotSize(Phase3* p3, InstCandidate& cand) const;
- virtual unsigned getSlotSize(Phase4* p4) const;
+ virtual unsigned getSlotSize(Phase4* p4, pp::GBTEntryType type) const;
virtual void findCandidates(const std::pair<uint64_t, uint64_t>& range,
std::vector<InstCandidate>& candidates);
@@ -46,7 +48,8 @@
private:
SparcInstManip() {}
- typedef std::map<unsigned, unsigned> OutputToInputRegMap;
+ typedef std::map<unsigned, unsigned> OutputToInputRegMap;
+ typedef std::map<GBTElem*, std::vector<uint64_t> > GBTStackMap;
void startCode(std::vector<unsigned>& snippet) { m_pCurrSnippet = &snippet; }
void endCode() { m_pCurrSnippet = 0; }
@@ -60,10 +63,7 @@
unsigned delaySlotInstr = NOP_INST);
void generateCall(uint64_t dest, uint64_t slotBase);
-
- void generateSPLoad(LogicalRegister reg, unsigned offset);
-
- // generateAlloca - Generate code to allocate 'size' bytes on the stack
+ void generateSPOffset(LogicalRegister reg, unsigned offset);
void generateAlloca(unsigned size);
void generateLoad(uint64_t value,
@@ -72,31 +72,20 @@
void generateStackStore(LogicalRegister src, unsigned stkOffset);
void generateStackLoad(LogicalRegister dest, unsigned stkOffset);
-
void generateRestore();
void generateSave();
- // generateRestoreShared - Generate code to un-spill the shared registers from
- // restoreFromAddr, using tmp1 and tmp2 as temporary registers
-
void generateRestoreShared(uint64_t restoreFromAddr,
LogicalRegister tmp1 = REG_0,
LogicalRegister tmp2 = REG_1);
- // generateRestoreShared - Generate code to un-spill the shared registers from the memory
- // at %sp + BIAS + offset
void generateRestoreShared(unsigned offset);
-
- // generateSpillShared - Generate code to spill the shared registers to spillFromAddr,
- // using tmp1 and tmp2 as temporary registers
+
void generateSpillShared(uint64_t spillFromAddr,
LogicalRegister tmp1 = REG_0,
LogicalRegister tmp2 = REG_1);
- // generateSpillShared - Generate code to spill the shared registers to the memory at
- // %sp + BIAS + offset
void generateSpillShared(unsigned offset);
-
unsigned getRestoreInst() const;
inline unsigned getCallInst(uint64_t dest, uint64_t pc) const;
inline unsigned getGenAddressCopySize(unsigned loadInst) const;
@@ -123,9 +112,22 @@
unsigned findAllocaOffset(uint64_t instAddr,
const std::pair<uint64_t, uint64_t>& range);
+ void buildStartIntervalSlot(GBTElem* gbte,
+ uint64_t slotBase,
+ uint64_t instAddr,
+ const std::pair<uint64_t, uint64_t>& extents,
+ std::vector<unsigned>& snippet);
+
+ void buildEndIntervalSlot(GBTElem* gbte,
+ uint64_t slotBase,
+ uint64_t instAddr,
+ const std::pair<uint64_t, uint64_t>& extents,
+ std::vector<unsigned>& snippet);
+
static bool isAllocaSignature(unsigned inst, unsigned& offset);
- std::vector<unsigned>* m_pCurrSnippet;
+ std::vector<unsigned>* m_pCurrSnippet;
+ GBTStackMap m_gbtStackMap; // Maps GBTElem* -> param address stack
OutputToInputRegMap m_outputToInputReg; // Maps input register -> output register
// Size (in number of 64-bit words) required for storing shared registers
@@ -153,6 +155,7 @@
static const unsigned WORD_WIDTH = 8;
static const unsigned PARAM_0 = 128;
static const unsigned PARAM_1 = PARAM_0 + 8;
+ static const unsigned PARAM_2 = PARAM_1 + 8;
// Fixed sizes of generated SparcV9 assembly snippets
static const unsigned GEN_LOAD_SIZE = 6;
@@ -167,7 +170,7 @@
static const unsigned GEN_UNSPL_SIZE = GEN_SPL_SIZE;
static const unsigned GEN_UNSPL_STK_SIZE = GEN_SPL_STK_SIZE;
static const unsigned GEN_ALLOCA_SIZE = 1;
- static const unsigned GEN_SPLOAD_SIZE = 1;
+ static const unsigned GEN_SPOFFSET_SIZE = 1;
};
unsigned SparcInstManip::getBranchAlways(uint64_t dest, uint64_t pc, bool annul) const
Index: llvm/lib/Reoptimizer/Inst/lib/design.txt
diff -u llvm/lib/Reoptimizer/Inst/lib/design.txt:1.13 llvm/lib/Reoptimizer/Inst/lib/design.txt:1.14
--- llvm/lib/Reoptimizer/Inst/lib/design.txt:1.13 Sun May 4 16:16:17 2003
+++ llvm/lib/Reoptimizer/Inst/lib/design.txt Mon May 12 21:00:23 2003
@@ -880,7 +880,6 @@
{{{ MILESTONES
-- Perform the "tracecache experiment" described in the TODO section.
}}}
@@ -890,8 +889,77 @@
(do not need variable-sized spill region except for phase5 invocations)
- Start table-of-stacks implementation for phase4 authorship of phase 5 slots.
+ - Placed on hold temporary because of "alloca-finding" approach. However, see the
+ following e-mail for the current state of things:
- - Write phase 5 slot generation code, phase 5 function itself, etc.
+ {{{ E-mail regarding alloca-finding and table-of-stacks approach
+Okay, this is starting to seem intractable. I have another problem that
+I don't think can be resolved without resorting to a custom-stack
+mechanism that will incur prohibitive overhead.
+
+Everything is working for start-region instrumentation sites. For
+end-region instrumentation sites, however, there's a problem. In order
+to write the slot for end sites, I have to know (or know how to compute)
+the address of the return value of the corresponding start site. I had
+originally thought that I would just store this in the GBT, or
+"something", but I clearly didn't think through the problem well enough.
+
+There are only two ways I can think of that this can occur:
+
+(a) Write the effective address of the return value of the start inst
+func, so that it gets passed to the end inst func.
+
+or
+
+(b) Somehow encode the stack offset to the return value from the start
+inst, where the offset is from the %sp *at the end-region site*
+
+Both of these have problems.
+
+First, I don't think (b) can work at all, given that there may be
+alloca's present in the original application that would change the %sp,
+and thus the offset value that we'd need, and we can't determine the
+exact allocas that are executed statically.
+
+For (a), the effective address isn't known until runtime. We can store
+this address in some global table where the phase 4 invocation for the
+end site can find it, but it is not sufficient to have a single scalar
+address here -- we must have a stack, due to potential recursive
+invocations. I think that this is clear, please let me know if I'm not
+making sense. :)
+
+Hence, we'd need to maintain a stack of effective addresses, which was
+pushed during the execution of phase 5 for the start site, and then read
+and popped during the execution of phase 5 for the end site. We're
+already really bloated with how many instructions we've got going on for
+all of the spills, etc, and I'm concerned about the effect that this
+stack manipulation will have on our overhead, as we talked about before.
+
+The way I see it, we only have two options if we're to make forward
+progress and not obliterate our chances of having lower overhead
+numbers. Hopefully we have some better choices. In the interests of
+short-term forward progress, I'm going to go with #1 for now.
+
+#1 - Make another common-case assumption that there will be no allocas
+between start and end sites, on *any* control path. If this is the case,
+then we know that the stack pointer will not have been manipulated (I
+think) between the start and end sites, and so the %sp offsets to the
+requisite data will be unchanged since when the phase 5 step occurred
+for the start site.
+
+#2 - Just implement our fall-back solution that everything seems to be
+pointing to. I'm not sure exactly what other logistic nightmares might
+be entailed in this, though, because I've only a sketch of the idea.
+
+I wanted to point out, also, that the so-called "fall back" approach we
+discussed previous also involves manipulation of a stack at runtime
+(push/pop actions still have to occur at runtime), so perhaps the stack
+of effective addresses is less prohibitive than I thought, if only in
+the sense that we cannot avoid it. :(
+ }}}
+
+ - Write phase 5 stuff for end-region sites -- will assume that not allocas lie between
+ the start and end sites, which is not particularly a fair assumption.
- Optimizations:
- No need to save registers (other than those clobbered) in phase 3 slot, since phase 3
More information about the llvm-commits
mailing list