[llvm] r293000 - AMDGPU add support for spilling to a user sgpr pointed buffers
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 25 09:12:51 PST 2017
Yes, go ahead (or let me know if you'd like me to do the merge).
Thanks,
Hans
On Tue, Jan 24, 2017 at 5:40 PM, Tom Stellard <tom at stellard.net> wrote:
> Hi Hans,
>
> Is this OK to merge into 4.0? It's required in order to be able to
> spill registers with the radv vulkan implementation.
>
> -Tom
>
> On Wed, Jan 25, 2017 at 01:25:14AM -0000, Tom Stellard via llvm-commits wrote:
>> Author: tstellar
>> Date: Tue Jan 24 19:25:13 2017
>> New Revision: 293000
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=293000&view=rev
>> Log:
>> AMDGPU add support for spilling to a user sgpr pointed buffers
>>
>> Summary:
>> This lets you select which sort of spilling you want, either s[0:1] or 64-bit loads from s[0:1].
>>
>> Patch By: Dave Airlie
>>
>> Reviewers: nhaehnle, arsenm, tstellarAMD
>>
>> Reviewed By: arsenm
>>
>> Subscribers: mareko, llvm-commits, kzhuravl, wdng, yaxunl, tony-tye
>>
>> Differential Revision: https://reviews.llvm.org/D25428
>>
>> Modified:
>> llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
>> llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
>> llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
>> llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
>> llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
>> llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
>> llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
>> llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
>> llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
>> llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
>>
>> Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=293000&r1=292999&r2=293000&view=diff
>> ==============================================================================
>> --- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
>> +++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Tue Jan 24 19:25:13 2017
>> @@ -100,6 +100,10 @@ def int_amdgcn_dispatch_id :
>> GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
>> Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
>>
>> +def int_amdgcn_implicit_buffer_ptr :
>> + GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
>> + Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
>> +
>> //===----------------------------------------------------------------------===//
>> // Instruction Intrinsics
>> //===----------------------------------------------------------------------===//
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=293000&r1=292999&r2=293000&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Tue Jan 24 19:25:13 2017
>> @@ -140,7 +140,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReacha
>> void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
>> const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
>> SIProgramInfo KernelInfo;
>> - if (STM.isAmdCodeObjectV2()) {
>> + if (STM.isAmdCodeObjectV2(*MF)) {
>> getSIProgramInfo(KernelInfo, *MF);
>> EmitAmdKernelCodeT(*MF, KernelInfo);
>> }
>> @@ -149,7 +149,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyS
>> void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
>> const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
>> const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
>> - if (MFI->isKernel() && STM.isAmdCodeObjectV2()) {
>> + if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
>> AMDGPUTargetStreamer *TS =
>> static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
>> SmallString<128> SymbolName;
>> @@ -779,7 +779,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCode
>>
>> // FIXME: Should use getKernArgSize
>> header.kernarg_segment_byte_size =
>> - STM.getKernArgSegmentSize(MFI->getABIArgOffset());
>> + STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
>> header.wavefront_sgpr_count = KernelInfo.NumSGPR;
>> header.workitem_vgpr_count = KernelInfo.NumVGPR;
>> header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=293000&r1=292999&r2=293000&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Tue Jan 24 19:25:13 2017
>> @@ -299,8 +299,9 @@ bool SISubtarget::isVGPRSpillingEnabled(
>> return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
>> }
>>
>> -unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const {
>> - unsigned ImplicitBytes = getImplicitArgNumBytes();
>> +unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
>> + unsigned ExplicitArgBytes) const {
>> + unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
>> if (ImplicitBytes == 0)
>> return ExplicitArgBytes;
>>
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=293000&r1=292999&r2=293000&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Tue Jan 24 19:25:13 2017
>> @@ -313,22 +313,31 @@ public:
>> return EnableXNACK;
>> }
>>
>> - bool isAmdCodeObjectV2() const {
>> - return isAmdHsaOS() || isMesa3DOS();
>> + bool isMesaKernel(const MachineFunction &MF) const {
>> + return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
>> + }
>> +
>> + // Covers VS/PS/CS graphics shaders
>> + bool isMesaGfxShader(const MachineFunction &MF) const {
>> + return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv());
>> + }
>> +
>> + bool isAmdCodeObjectV2(const MachineFunction &MF) const {
>> + return isAmdHsaOS() || isMesaKernel(MF);
>> }
>>
>> /// \brief Returns the offset in bytes from the start of the input buffer
>> /// of the first explicit kernel argument.
>> - unsigned getExplicitKernelArgOffset() const {
>> - return isAmdCodeObjectV2() ? 0 : 36;
>> + unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
>> + return isAmdCodeObjectV2(MF) ? 0 : 36;
>> }
>>
>> unsigned getAlignmentForImplicitArgPtr() const {
>> return isAmdHsaOS() ? 8 : 4;
>> }
>>
>> - unsigned getImplicitArgNumBytes() const {
>> - if (isMesa3DOS())
>> + unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
>> + if (isMesaKernel(MF))
>> return 16;
>> if (isAmdHsaOS() && isOpenCLEnv())
>> return 32;
>> @@ -595,7 +604,7 @@ public:
>> return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
>> }
>>
>> - unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const;
>> + unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
>>
>> /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
>> unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=293000&r1=292999&r2=293000&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Tue Jan 24 19:25:13 2017
>> @@ -1587,7 +1587,7 @@ SDValue R600TargetLowering::LowerFormalA
>>
>> unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
>> unsigned PartOffset = VA.getLocMemOffset();
>> - unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset();
>> + unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset();
>>
>> MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
>> SDValue Arg = DAG.getLoad(
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=293000&r1=292999&r2=293000&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Tue Jan 24 19:25:13 2017
>> @@ -237,7 +237,7 @@ void SIFrameLowering::emitPrologue(Machi
>>
>>
>> unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
>> - if (ST.isAmdCodeObjectV2()) {
>> + if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
>> PreloadedPrivateBufferReg = TRI->getPreloadedValue(
>> MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
>> }
>> @@ -255,7 +255,7 @@ void SIFrameLowering::emitPrologue(Machi
>> }
>>
>> if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
>> - assert(ST.isAmdCodeObjectV2());
>> + assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
>> MRI.addLiveIn(PreloadedPrivateBufferReg);
>> MBB.addLiveIn(PreloadedPrivateBufferReg);
>> }
>> @@ -280,6 +280,7 @@ void SIFrameLowering::emitPrologue(Machi
>>
>> bool CopyBuffer = ResourceRegUsed &&
>> PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
>> + ST.isAmdCodeObjectV2(MF) &&
>> ScratchRsrcReg != PreloadedPrivateBufferReg;
>>
>> // This needs to be careful of the copying order to avoid overwriting one of
>> @@ -303,24 +304,57 @@ void SIFrameLowering::emitPrologue(Machi
>> .addReg(PreloadedPrivateBufferReg, RegState::Kill);
>> }
>>
>> - if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) {
>> - assert(!ST.isAmdCodeObjectV2());
>> + if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
>> + assert(!ST.isAmdCodeObjectV2(MF));
>> const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
>>
>> - unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
>> - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
>> unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
>> unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
>>
>> // Use relocations to get the pointer, and setup the other bits manually.
>> uint64_t Rsrc23 = TII->getScratchRsrcWords23();
>> - BuildMI(MBB, I, DL, SMovB32, Rsrc0)
>> - .addExternalSymbol("SCRATCH_RSRC_DWORD0")
>> - .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
>> -
>> - BuildMI(MBB, I, DL, SMovB32, Rsrc1)
>> - .addExternalSymbol("SCRATCH_RSRC_DWORD1")
>> - .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
>> +
>> + if (MFI->hasPrivateMemoryInputPtr()) {
>> + unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
>> +
>> + if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
>> + const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
>> +
>> + BuildMI(MBB, I, DL, Mov64, Rsrc01)
>> + .addReg(PreloadedPrivateBufferReg)
>> + .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
>> + } else {
>> + const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
>> +
>> + PointerType *PtrTy =
>> + PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
>> + AMDGPUAS::CONSTANT_ADDRESS);
>> + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
>> + auto MMO = MF.getMachineMemOperand(PtrInfo,
>> + MachineMemOperand::MOLoad |
>> + MachineMemOperand::MOInvariant |
>> + MachineMemOperand::MODereferenceable,
>> + 0, 0);
>> + BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
>> + .addReg(PreloadedPrivateBufferReg)
>> + .addImm(0) // offset
>> + .addImm(0) // glc
>> + .addMemOperand(MMO)
>> + .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
>> + }
>> + } else {
>> + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
>> + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
>> +
>> + BuildMI(MBB, I, DL, SMovB32, Rsrc0)
>> + .addExternalSymbol("SCRATCH_RSRC_DWORD0")
>> + .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
>> +
>> + BuildMI(MBB, I, DL, SMovB32, Rsrc1)
>> + .addExternalSymbol("SCRATCH_RSRC_DWORD1")
>> + .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
>> +
>> + }
>>
>> BuildMI(MBB, I, DL, SMovB32, Rsrc2)
>> .addImm(Rsrc23 & 0xffffffff)
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=293000&r1=292999&r2=293000&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Tue Jan 24 19:25:13 2017
>> @@ -891,7 +891,7 @@ SDValue SITargetLowering::LowerFormalArg
>> if (!AMDGPU::isShader(CallConv)) {
>> assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
>> } else {
>> - assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
>> + assert(!Info->hasDispatchPtr() &&
>> !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
>> !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
>> !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
>> @@ -899,6 +899,12 @@ SDValue SITargetLowering::LowerFormalArg
>> !Info->hasWorkItemIDZ());
>> }
>>
>> + if (Info->hasPrivateMemoryInputPtr()) {
>> + unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI);
>> + MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass);
>> + CCInfo.AllocateReg(PrivateMemoryPtrReg);
>> + }
>> +
>> // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
>> if (Info->hasPrivateSegmentBuffer()) {
>> unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
>> @@ -956,7 +962,7 @@ SDValue SITargetLowering::LowerFormalArg
>> if (VA.isMemLoc()) {
>> VT = Ins[i].VT;
>> EVT MemVT = VA.getLocVT();
>> - const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
>> + const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
>> VA.getLocMemOffset();
>> // The first 36 bytes of the input buffer contains information about
>> // thread group and global sizes.
>> @@ -1080,7 +1086,7 @@ SDValue SITargetLowering::LowerFormalArg
>> if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
>> HasStackObjects = true;
>>
>> - if (ST.isAmdCodeObjectV2()) {
>> + if (ST.isAmdCodeObjectV2(MF)) {
>> if (HasStackObjects) {
>> // If we have stack objects, we unquestionably need the private buffer
>> // resource. For the Code Object V2 ABI, this will be the first 4 user
>> @@ -2504,9 +2510,13 @@ SDValue SITargetLowering::LowerINTRINSIC
>> // TODO: Should this propagate fast-math-flags?
>>
>> switch (IntrinsicID) {
>> + case Intrinsic::amdgcn_implicit_buffer_ptr: {
>> + unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
>> + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
>> + }
>> case Intrinsic::amdgcn_dispatch_ptr:
>> case Intrinsic::amdgcn_queue_ptr: {
>> - if (!Subtarget->isAmdCodeObjectV2()) {
>> + if (!Subtarget->isAmdCodeObjectV2(MF)) {
>> DiagnosticInfoUnsupported BadIntrin(
>> *MF.getFunction(), "unsupported hsa intrinsic without hsa target",
>> DL.getDebugLoc());
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=293000&r1=292999&r2=293000&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Tue Jan 24 19:25:13 2017
>> @@ -77,7 +77,8 @@ SIMachineFunctionInfo::SIMachineFunction
>> PrivateSegmentWaveByteOffset(false),
>> WorkItemIDX(false),
>> WorkItemIDY(false),
>> - WorkItemIDZ(false) {
>> + WorkItemIDZ(false),
>> + PrivateMemoryInputPtr(false) {
>> const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
>> const Function *F = MF.getFunction();
>>
>> @@ -114,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunction
>> if (HasStackObjects || MaySpill)
>> PrivateSegmentWaveByteOffset = true;
>>
>> - if (ST.isAmdCodeObjectV2()) {
>> + if (ST.isAmdCodeObjectV2(MF)) {
>> if (HasStackObjects || MaySpill)
>> PrivateSegmentBuffer = true;
>>
>> @@ -126,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunction
>>
>> if (F->hasFnAttribute("amdgpu-dispatch-id"))
>> DispatchID = true;
>> + } else if (ST.isMesaGfxShader(MF)) {
>> + if (HasStackObjects || MaySpill)
>> + PrivateMemoryInputPtr = true;
>> }
>>
>> // We don't need to worry about accessing spills with flat instructions.
>> @@ -182,6 +186,13 @@ unsigned SIMachineFunctionInfo::addFlatS
>> return FlatScratchInitUserSGPR;
>> }
>>
>> +unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
>> + PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
>> + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
>> + NumUserSGPRs += 2;
>> + return PrivateMemoryPtrUserSGPR;
>> +}
>> +
>> SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
>> MachineFunction *MF,
>> unsigned FrameIndex,
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h?rev=293000&r1=292999&r2=293000&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h Tue Jan 24 19:25:13 2017
>> @@ -87,6 +87,9 @@ class SIMachineFunctionInfo final : publ
>> unsigned ScratchRSrcReg;
>> unsigned ScratchWaveOffsetReg;
>>
>> + // Input registers for non-HSA ABI
>> + unsigned PrivateMemoryPtrUserSGPR;
>> +
>> // Input registers setup for the HSA ABI.
>> // User SGPRs in allocation order.
>> unsigned PrivateSegmentBufferUserSGPR;
>> @@ -166,6 +169,11 @@ private:
>> bool WorkItemIDY : 1;
>> bool WorkItemIDZ : 1;
>>
>> + // Private memory buffer
>> + // Compute directly in sgpr[0:1]
>> + // Other shaders indirect 64-bits at sgpr[0:1]
>> + bool PrivateMemoryInputPtr : 1;
>> +
>> MCPhysReg getNextUserSGPR() const {
>> assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
>> return AMDGPU::SGPR0 + NumUserSGPRs;
>> @@ -204,6 +212,7 @@ public:
>> unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
>> unsigned addDispatchID(const SIRegisterInfo &TRI);
>> unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
>> + unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
>>
>> // Add system SGPRs.
>> unsigned addWorkGroupIDX() {
>> @@ -308,6 +317,10 @@ public:
>> return WorkItemIDZ;
>> }
>>
>> + bool hasPrivateMemoryInputPtr() const {
>> + return PrivateMemoryInputPtr;
>> + }
>> +
>> unsigned getNumUserSGPRs() const {
>> return NumUserSGPRs;
>> }
>> @@ -344,6 +357,10 @@ public:
>> return QueuePtrUserSGPR;
>> }
>>
>> + unsigned getPrivateMemoryPtrUserSGPR() const {
>> + return PrivateMemoryPtrUserSGPR;
>> + }
>> +
>> bool hasSpilledSGPRs() const {
>> return HasSpilledSGPRs;
>> }
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=293000&r1=292999&r2=293000&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Tue Jan 24 19:25:13 2017
>> @@ -1108,10 +1108,12 @@ unsigned SIRegisterInfo::getPreloadedVal
>> case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
>> return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
>> case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
>> - assert(ST.isAmdCodeObjectV2() &&
>> - "Non-CodeObjectV2 ABI currently uses relocations");
>> - assert(MFI->hasPrivateSegmentBuffer());
>> - return MFI->PrivateSegmentBufferUserSGPR;
>> + if (ST.isAmdCodeObjectV2(MF)) {
>> + assert(MFI->hasPrivateSegmentBuffer());
>> + return MFI->PrivateSegmentBufferUserSGPR;
>> + }
>> + assert(MFI->hasPrivateMemoryInputPtr());
>> + return MFI->PrivateMemoryPtrUserSGPR;
>> case SIRegisterInfo::KERNARG_SEGMENT_PTR:
>> assert(MFI->hasKernargSegmentPtr());
>> return MFI->KernargSegmentPtrUserSGPR;
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list