Index: test/Makefile =================================================================== --- test/Makefile (revision 88877) +++ test/Makefile (working copy) @@ -77,12 +77,12 @@ endif # Both AuroraUX & Solaris do not have the -m flag for ulimit ifeq ($(HOST_OS),SunOS) -ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ; +ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; else # !SunOS ifeq ($(HOST_OS),AuroraUX) -ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ; +ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; else # !AuroraUX -ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 512000 ; +ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; endif # AuroraUX endif # SunOS Index: utils/lit/TestFormats.py =================================================================== --- utils/lit/TestFormats.py (revision 88877) +++ utils/lit/TestFormats.py (working copy) @@ -53,8 +53,9 @@ class GoogleTest(object): def execute(self, test, litConfig): testPath,testName = os.path.split(test.getSourcePath()) - if not os.path.exists(testPath): - # Handle GTest typed tests, whose name includes a '/'. + while not os.path.exists(testPath): + # Handle GTest parametrized and typed tests, whose name includes + # some '/'s. testPath, namePrefix = os.path.split(testPath) testName = os.path.join(namePrefix, testName) Index: unittests/ExecutionEngine/JIT/JITTest.cpp =================================================================== --- unittests/ExecutionEngine/JIT/JITTest.cpp (revision 88877) +++ unittests/ExecutionEngine/JIT/JITTest.cpp (working copy) @@ -26,10 +26,22 @@ #include "llvm/Support/IRBuilder.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TypeBuilder.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSelect.h" #include "llvm/Type.h" #include +#include + +#if HAVE_ERRNO_H +#include +#endif +#if HAVE_UNISTD_H +#include +#endif +#if _POSIX_MAPPED_FILES > 0 +#include +#endif using namespace llvm; @@ -177,6 +189,15 @@ public: } }; +void LoadAssemblyInto(Module *M, const char *assembly) { + SMDiagnostic Error; + bool success = NULL != ParseAssemblyString(assembly, M, Error, M->getContext()); + std::string errMsg; + raw_string_ostream os(errMsg); + Error.Print("", os); + ASSERT_TRUE(success) << os.str(); +} + class JITTest : public testing::Test { protected: virtual void SetUp() { @@ -191,12 +212,7 @@ class JITTest : public testing::Test { } void LoadAssembly(const char *assembly) { - SMDiagnostic Error; - bool success = NULL != ParseAssemblyString(assembly, M, Error, Context); - std::string errMsg; - raw_string_ostream os(errMsg); - Error.Print("", os); - ASSERT_TRUE(success) << os.str(); + LoadAssemblyInto(M, assembly); } LLVMContext Context; @@ -498,6 +514,135 @@ TEST_F(JITTest, NoStubs) { } #endif +#if _POSIX_MAPPED_FILES > 0 && (defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)) +class FarCallMemMgr : public RecordingJITMemoryManager { + void *MmapRegion; + size_t MmapSize; + uint8_t *NextStub; + uint8_t *NextFunction; + + public: + FarCallMemMgr() + : MmapSize(16ULL << 30) { // 16GB + MmapRegion = mmap(NULL, MmapSize, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (MmapRegion == MAP_FAILED) { + ADD_FAILURE() << "mmap failed: " << strerror(errno); + } + // Set up the 16GB mapped region in several chunks: + // Stubs / ~5GB empty space / Function 1 / ~5GB empty space / Function 2 + // This way no two entities can use a 32-bit relative call to reach each other. + NextStub = static_cast(MmapRegion); + NextFunction = NextStub + (5ULL << 30); + + // Next, poison some of the memory so a wild call will eventually crash, + // even if memory was initialized by the OS to 0. We can't poison all of + // the memory because we want to be able to run on systems with less than + // 16GB of physical ram. + int TrapInstr = 0xCC; // INT 3 + memset(NextStub, TrapInstr, 1<<10); + for (size_t Offset = 1<<30; Offset < MmapSize; Offset += 1<<30) { + // Fill the 2KB around each GB boundary with trap instructions. This + // should ensure that we can't run into emitted functions without hitting + // the trap. + memset(NextStub + Offset - (1<<10), TrapInstr, 2<<10); + } + } + + ~FarCallMemMgr() { + EXPECT_EQ(0, munmap(MmapRegion, MmapSize)); + } + + virtual void setMemoryWritable() {} + virtual void setMemoryExecutable() {} + virtual uint8_t *startFunctionBody(const Function *F, + uintptr_t &ActualSize) { + ActualSize = 1 << 30; + uint8_t *Result = NextFunction; + NextFunction += 5ULL << 30; + return Result; + } + virtual void endFunctionBody(const Function*, uint8_t*, uint8_t*) {} + virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment) { + NextStub = reinterpret_cast( + uintptr_t(NextStub + Alignment - 1) &~ uintptr_t(Alignment - 1)); + uint8_t *Result = NextStub; + NextStub += StubSize; + return Result; + } +}; + +class FarTargetTest : public ::testing::TestWithParam { + protected: + FarTargetTest() : SavedCodeModel(TargetMachine::getCodeModel()) {} + ~FarTargetTest() { + TargetMachine::setCodeModel(SavedCodeModel); + } + + const CodeModel::Model SavedCodeModel; +}; +INSTANTIATE_TEST_CASE_P(CodeGenOpt, + FarTargetTest, + ::testing::Values(CodeGenOpt::None, + CodeGenOpt::Default)); + +TEST_P(FarTargetTest, CallToFarTarget) { + // x86-64 can only make direct calls to functions within 32 bits of + // the current PC. To call anything farther away, we have to load + // the address into a register and call through the register. The + // old JIT did this by allocating a stub for any far call. However, + // that stub needed to be within 32 bits of the callsite. Here we + // test that the JIT correctly deals with stubs and calls more than + // 32 bits away from the callsite. + + // Make sure the code generator is assuming code might be far away. + //TargetMachine::setCodeModel(CodeModel::Large); + + LLVMContext Context; + Module *M = new Module("
", Context); + ExistingModuleProvider *MP = new ExistingModuleProvider(M); + + JITMemoryManager *MemMgr = new FarCallMemMgr(); + std::string Error; + OwningPtr JIT(EngineBuilder(MP) + .setEngineKind(EngineKind::JIT) + .setErrorStr(&Error) + .setJITMemoryManager(MemMgr) + .setOptLevel(GetParam()) + .create()); + ASSERT_EQ(Error, ""); + TargetMachine::setCodeModel(CodeModel::Large); + + LoadAssemblyInto(M, + "define i32 @test() { " + " ret i32 7 " + "} " + " " + "define i32 @test_far() { " + " %result = call i32 @test() " + " ret i32 %result " + "} "); + // First, lay out a function early in memory. + Function *TestFunction = M->getFunction("test"); + int32_t (*TestFunctionPtr)() = reinterpret_cast( + (intptr_t)JIT->getPointerToFunction(TestFunction)); + ASSERT_EQ(7, TestFunctionPtr()); + + // We now lay out the far-away function. This should land >4GB away from test(). + Function *FarFunction = M->getFunction("test_far"); + int32_t (*FarFunctionPtr)() = reinterpret_cast( + (intptr_t)JIT->getPointerToFunction(FarFunction)); + + EXPECT_LT(1LL << 32, llabs(intptr_t(FarFunctionPtr) - intptr_t(TestFunctionPtr))) + << "Functions must be >32 bits apart or the test is meaningless."; + + // This used to result in a segfault in FarFunction, when its call instruction + // jumped to the wrong address. + EXPECT_EQ(7, FarFunctionPtr()); +} +#endif // Platform has far-call problem. + // This code is copied from JITEventListenerTest, but it only runs once for all // the tests in this directory. Everything seems fine, but that's strange // behavior. Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp (revision 88877) +++ lib/Target/X86/X86ISelLowering.cpp (working copy) @@ -1937,9 +1937,19 @@ X86TargetLowering::LowerCall(SDValue Cha FPDiff, dl); } - // If the callee is a GlobalAddress node (quite common, every direct call is) - // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + bool WasGlobalOrExternal = false; + if (getTargetMachine().getCodeModel() == CodeModel::Large) { + assert(Is64Bit && "Large code model is only legal in 64-bit mode."); + // In the 64-bit large code model, we have to make all calls + // through a register, since the call instruction's 32-bit + // pc-relative offset may not be large enough to hold the whole + // address. + } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + WasGlobalOrExternal = true; + // If the callee is a GlobalAddress node (quite common, every direct call + // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack + // it. + // We should use extra load for direct calls to dllimported functions in // non-JIT mode. GlobalValue *GV = G->getGlobal(); @@ -1967,6 +1977,7 @@ X86TargetLowering::LowerCall(SDValue Cha G->getOffset(), OpFlags); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + WasGlobalOrExternal = true; unsigned char OpFlags = 0; // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external @@ -1984,7 +1995,9 @@ X86TargetLowering::LowerCall(SDValue Cha Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlags); - } else if (isTailCall) { + } + + if (isTailCall && !WasGlobalOrExternal) { unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, Index: lib/Target/X86/X86CodeEmitter.cpp =================================================================== --- lib/Target/X86/X86CodeEmitter.cpp (revision 88877) +++ lib/Target/X86/X86CodeEmitter.cpp (working copy) @@ -82,7 +82,7 @@ namespace { void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0, - bool MayNeedFarStub = false, bool Indirect = false); + bool Indirect = false); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0); @@ -176,7 +176,6 @@ template void Emitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t Disp /* = 0 */, intptr_t PCAdj /* = 0 */, - bool MayNeedFarStub /* = false */, bool Indirect /* = false */) { intptr_t RelocCST = Disp; if (Reloc == X86::reloc_picrel_word) @@ -185,9 +184,9 @@ void Emitter::emitGlobalAdd RelocCST = PCAdj; MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, MayNeedFarStub) + GV, RelocCST, false) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, MayNeedFarStub); + GV, RelocCST, false); MCE.addRelocation(MR); // The relocated value will be added to the displacement if (Reloc == X86::reloc_absolute_dword) @@ -333,10 +332,9 @@ void Emitter::emitDisplacem // do it, otherwise fallback to absolute (this is determined by IsPCRel). // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute - bool MayNeedFarStub = isa(RelocOp->getGlobal()); bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(), - Adj, MayNeedFarStub, Indirect); + Adj, Indirect); } else if (RelocOp->isSymbol()) { emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType); } else if (RelocOp->isCPI()) { @@ -633,14 +631,8 @@ void Emitter::emitInstructi } if (MO.isGlobal()) { - // Assume undefined functions may be outside the Small codespace. - bool MayNeedFarStub = - (Is64BitMode && - (TM.getCodeModel() == CodeModel::Large || - TM.getSubtarget().isTargetDarwin())) || - Opcode == X86::TAILJMPd; emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, - MO.getOffset(), 0, MayNeedFarStub); + MO.getOffset(), 0); break; } @@ -681,10 +673,9 @@ void Emitter::emitInstructi if (Opcode == X86::MOV64ri) rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? if (MO1.isGlobal()) { - bool MayNeedFarStub = isa(MO1.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO1, TM); emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - MayNeedFarStub, Indirect); + Indirect); } else if (MO1.isSymbol()) emitExternalSymbolAddress(MO1.getSymbolName(), rt); else if (MO1.isCPI()) @@ -790,10 +781,9 @@ void Emitter::emitInstructi if (Opcode == X86::MOV64ri32) rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? if (MO1.isGlobal()) { - bool MayNeedFarStub = isa(MO1.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO1, TM); emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - MayNeedFarStub, Indirect); + Indirect); } else if (MO1.isSymbol()) emitExternalSymbolAddress(MO1.getSymbolName(), rt); else if (MO1.isCPI()) @@ -831,10 +821,9 @@ void Emitter::emitInstructi if (Opcode == X86::MOV64mi32) rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? if (MO.isGlobal()) { - bool MayNeedFarStub = isa(MO.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO, TM); emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, - MayNeedFarStub, Indirect); + Indirect); } else if (MO.isSymbol()) emitExternalSymbolAddress(MO.getSymbolName(), rt); else if (MO.isCPI()) Index: lib/Target/X86/X86JITInfo.cpp =================================================================== --- lib/Target/X86/X86JITInfo.cpp (revision 88877) +++ lib/Target/X86/X86JITInfo.cpp (working copy) @@ -367,8 +367,9 @@ X86CompilationCallback2(intptr_t *StackP // Rewrite the call target... so that we don't end up here every time we // execute the call. #if defined (X86_64_JIT) - if (!isStub) - *(intptr_t *)(RetAddr - 0xa) = NewVal; + assert(isStub && + "X86-64 doesn't support rewriting non-stub lazy compilation calls:" + " the call instruction varies too much."); #else *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4); #endif Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp (revision 88877) +++ lib/Target/X86/X86TargetMachine.cpp (working copy) @@ -185,14 +185,8 @@ bool X86TargetMachine::addCodeEmitter(Pa } // 64-bit JIT places everything in the same buffer except external functions. - // On Darwin, use small code model but hack the call instruction for - // externals. Elsewhere, do not assume globals are in the lower 4G. - if (Subtarget.is64Bit()) { - if (Subtarget.isTargetDarwin()) - setCodeModel(CodeModel::Small); - else + if (Subtarget.is64Bit()) setCodeModel(CodeModel::Large); - } PM.add(createX86CodeEmitterPass(*this, MCE)); @@ -211,14 +205,8 @@ bool X86TargetMachine::addCodeEmitter(Pa } // 64-bit JIT places everything in the same buffer except external functions. - // On Darwin, use small code model but hack the call instruction for - // externals. Elsewhere, do not assume globals are in the lower 4G. - if (Subtarget.is64Bit()) { - if (Subtarget.isTargetDarwin()) - setCodeModel(CodeModel::Small); - else + if (Subtarget.is64Bit()) setCodeModel(CodeModel::Large); - } PM.add(createX86JITCodeEmitterPass(*this, JCE)); Index: lib/ExecutionEngine/JIT/JITEmitter.cpp =================================================================== --- lib/ExecutionEngine/JIT/JITEmitter.cpp (revision 88877) +++ lib/ExecutionEngine/JIT/JITEmitter.cpp (working copy) @@ -247,16 +247,6 @@ namespace { /// specified GV address. void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress); - /// AddCallbackAtLocation - If the target is capable of rewriting an - /// instruction without the use of a stub, record the location of the use so - /// we know which function is being used at the location. - void *AddCallbackAtLocation(Function *F, void *Location) { - MutexGuard locked(TheJIT->lock); - /// Get the target-specific JIT resolver function. - state.AddCallSite(locked, Location, F); - return (void*)(intptr_t)LazyResolverFn; - } - void getRelocatableGVs(SmallVectorImpl &GVs, SmallVectorImpl &Ptrs); @@ -756,13 +746,6 @@ void *JITEmitter::getPointerToGlobal(Glo !MayNeedFarStub) return TheJIT->getPointerToFunction(F); - // Okay, the function has not been compiled yet, if the target callback - // mechanism is capable of rewriting the instruction directly, prefer to do - // that instead of emitting a stub. This uses the lazy resolver, so is not - // legal if lazy compilation is disabled. - if (!MayNeedFarStub && TheJIT->isCompilingLazily()) - return Resolver.AddCallbackAtLocation(F, Reference); - // Otherwise, we have to emit a stub. void *StubAddr = Resolver.getFunctionStub(F);