[llvm] r273769 - [AMDGPU] Emit debugger prologue and emit the rest of the debugger fields in the kernel code header
Konstantin Zhuravlyov via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 24 20:11:28 PDT 2016
Author: kzhuravl
Date: Fri Jun 24 22:11:28 2016
New Revision: 273769
URL: http://llvm.org/viewvc/llvm-project?rev=273769&view=rev
Log:
[AMDGPU] Emit debugger prologue and emit the rest of the debugger fields in the kernel code header
Debugger prologue is emitted if -mattr=+amdgpu-debugger-emit-prologue.
Debugger prologue writes work group IDs and work item IDs to scratch memory at fixed location in the following format:
- offset 0: work group ID x
- offset 4: work group ID y
- offset 8: work group ID z
- offset 16: work item ID x
- offset 20: work item ID y
- offset 24: work item ID z
Set
- amd_kernel_code_t::debug_wavefront_private_segment_offset_sgpr to scratch wave offset reg
- amd_kernel_code_t::debug_private_segment_buffer_sgpr to scratch rsrc reg
- amd_kernel_code_t::is_debug_supported to true if all debugger features are enabled
Differential Revision: http://reviews.llvm.org/D20335
Added:
llvm/trunk/test/CodeGen/AMDGPU/debugger-emit-prologue.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=273769&r1=273768&r2=273769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Fri Jun 24 22:11:28 2016
@@ -329,6 +329,13 @@ def FeatureDebuggerReserveRegs : Subtarg
"Reserve registers for debugger usage"
>;
+def FeatureDebuggerEmitPrologue : SubtargetFeature<
+ "amdgpu-debugger-emit-prologue",
+ "DebuggerEmitPrologue",
+ "true",
+ "Emit debugger prologue"
+>;
+
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=273769&r1=273768&r2=273769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Fri Jun 24 22:11:28 2016
@@ -200,6 +200,13 @@ bool AMDGPUAsmPrinter::runOnMachineFunct
OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount),
false);
+ if (MF.getSubtarget<SISubtarget>().debuggerEmitPrologue()) {
+ OutStreamer->emitRawComment(" DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
+ Twine(KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
+ OutStreamer->emitRawComment(" DebuggerPrivateSegmentBufferSGPR: s" +
+ Twine(KernelInfo.DebuggerPrivateSegmentBufferSGPR), false);
+ }
+
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
false);
@@ -444,6 +451,16 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
MaxVGPR += MFI->getDebuggerReservedVGPRCount();
}
+ // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
+ // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
+ // attribute was specified.
+ if (STM.debuggerEmitPrologue()) {
+ ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
+ RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
+ ProgInfo.DebuggerPrivateSegmentBufferSGPR =
+ RI->getHWRegIndex(MFI->getScratchRSrcReg());
+ }
+
// We found the maximum register index. They start at 0, so add one to get the
// number of registers.
ProgInfo.NumVGPR = MaxVGPR + 1;
@@ -670,6 +687,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCode
if (MFI->hasDispatchPtr())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ if (STM.debuggerSupported())
+ header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
+
if (STM.isXNACKEnabled())
header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
@@ -681,6 +701,13 @@ void AMDGPUAsmPrinter::EmitAmdKernelCode
header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
+ if (STM.debuggerEmitPrologue()) {
+ header.debug_wavefront_private_segment_offset_sgpr =
+ KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
+ header.debug_private_segment_buffer_sgpr =
+ KernelInfo.DebuggerPrivateSegmentBufferSGPR;
+ }
+
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h?rev=273769&r1=273768&r2=273769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h Fri Jun 24 22:11:28 2016
@@ -42,6 +42,8 @@ private:
FlatUsed(false),
ReservedVGPRFirst(0),
ReservedVGPRCount(0),
+ DebuggerWavefrontPrivateSegmentOffsetSGPR((uint16_t)-1),
+ DebuggerPrivateSegmentBufferSGPR((uint16_t)-1),
VCCUsed(false),
CodeLen(0) {}
@@ -75,6 +77,14 @@ private:
// The number of consecutive VGPRs reserved.
uint16_t ReservedVGPRCount;
+ // Fixed SGPR number used to hold wave scratch offset for entire kernel
+ // execution, or uint16_t(-1) if the register is not used or not known.
+ uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR;
+ // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
+ // kernel execution, or uint16_t(-1) if the register is not used or not
+ // known.
+ uint16_t DebuggerPrivateSegmentBufferSGPR;
+
// Bonus information for debugging.
bool VCCUsed;
uint64_t CodeLen;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=273769&r1=273768&r2=273769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Fri Jun 24 22:11:28 2016
@@ -101,6 +101,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
EnableXNACK(false),
DebuggerInsertNops(false),
DebuggerReserveRegs(false),
+ DebuggerEmitPrologue(false),
EnableVGPRSpilling(false),
EnablePromoteAlloca(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=273769&r1=273768&r2=273769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Fri Jun 24 22:11:28 2016
@@ -77,6 +77,7 @@ protected:
bool EnableXNACK;
bool DebuggerInsertNops;
bool DebuggerReserveRegs;
+ bool DebuggerEmitPrologue;
// Used as options.
bool EnableVGPRSpilling;
@@ -402,6 +403,11 @@ public:
return EnableSIScheduler;
}
+ bool debuggerSupported() const {
+ return debuggerInsertNops() && debuggerReserveRegs() &&
+ debuggerEmitPrologue();
+ }
+
bool debuggerInsertNops() const {
return DebuggerInsertNops;
}
@@ -410,6 +416,10 @@ public:
return DebuggerReserveRegs;
}
+ bool debuggerEmitPrologue() const {
+ return DebuggerEmitPrologue;
+ }
+
bool loadStoreOptEnabled() const {
return EnableLoadStoreOpt;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=273769&r1=273768&r2=273769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Fri Jun 24 22:11:28 2016
@@ -39,6 +39,12 @@ static ArrayRef<MCPhysReg> getAllSGPRs()
void SIFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
+ // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
+ // specified.
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ if (ST.debuggerEmitPrologue())
+ emitDebuggerPrologue(MF, MBB);
+
if (!MF.getFrameInfo()->hasStackObjects())
return;
@@ -54,7 +60,6 @@ void SIFrameLowering::emitPrologue(Machi
if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
return;
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -87,6 +92,8 @@ void SIFrameLowering::emitPrologue(Machi
// pointer. Because we only detect if flat instructions are used at all,
// this will be used more often than necessary on VI.
+ // Debug location must be unknown since the first debug location is used to
+ // determine the end of the prologue.
DebugLoc DL;
unsigned FlatScratchInitReg
@@ -289,3 +296,44 @@ void SIFrameLowering::processFunctionBef
RS->addScavengingFrameIndex(ScavengeFI);
}
}
+
+void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ MachineBasicBlock::iterator I = MBB.begin();
+ DebugLoc DL;
+
+ // For each dimension:
+ for (unsigned i = 0; i < 3; ++i) {
+ // Get work group ID SGPR, and make it live-in again.
+ unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i);
+ MF.getRegInfo().addLiveIn(WorkGroupIDSGPR);
+ MBB.addLiveIn(WorkGroupIDSGPR);
+
+ // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in
+ // order to spill it to scratch.
+ unsigned WorkGroupIDVGPR =
+ MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR)
+ .addReg(WorkGroupIDSGPR);
+
+ // Spill work group ID.
+ int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i);
+ TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false,
+ WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
+
+ // Get work item ID VGPR, and make it live-in again.
+ unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i);
+ MF.getRegInfo().addLiveIn(WorkItemIDVGPR);
+ MBB.addLiveIn(WorkItemIDVGPR);
+
+ // Spill work item ID.
+ int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i);
+ TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false,
+ WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
+ }
+}
Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h?rev=273769&r1=273768&r2=273769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h Fri Jun 24 22:11:28 2016
@@ -29,6 +29,10 @@ public:
void processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
+
+private:
+ /// \brief Emits debugger prologue.
+ void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
};
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=273769&r1=273768&r2=273769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Jun 24 22:11:28 2016
@@ -596,6 +596,11 @@ SDValue SITargetLowering::LowerFormalArg
return DAG.getEntryNode();
}
+ // Create stack objects that are used for emitting debugger prologue if
+ // "amdgpu-debugger-emit-prologue" attribute was specified.
+ if (ST.debuggerEmitPrologue())
+ createDebuggerPrologueStackObjects(MF);
+
SmallVector<ISD::InputArg, 16> Splits;
BitVector Skipped(Ins.size());
@@ -1258,6 +1263,32 @@ bool SITargetLowering::isCFIntrinsic(con
}
}
+void SITargetLowering::createDebuggerPrologueStackObjects(
+ MachineFunction &MF) const {
+ // Create stack objects that are used for emitting debugger prologue.
+ //
+ // Debugger prologue writes work group IDs and work item IDs to scratch memory
+ // at fixed location in the following format:
+ // offset 0: work group ID x
+ // offset 4: work group ID y
+ // offset 8: work group ID z
+ // offset 16: work item ID x
+ // offset 20: work item ID y
+ // offset 24: work item ID z
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ int ObjectIdx = 0;
+
+ // For each dimension:
+ for (unsigned i = 0; i < 3; ++i) {
+ // Create fixed stack object for work group ID.
+ ObjectIdx = MF.getFrameInfo()->CreateFixedObject(4, i * 4, true);
+ Info->setDebuggerWorkGroupIDStackObjectIndex(i, ObjectIdx);
+ // Create fixed stack object for work item ID.
+ ObjectIdx = MF.getFrameInfo()->CreateFixedObject(4, i * 4 + 16, true);
+ Info->setDebuggerWorkItemIDStackObjectIndex(i, ObjectIdx);
+ }
+}
+
/// This transforms the control flow intrinsics to get the branch destination as
/// last parameter, also switches branch target with BR if the need arise
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=273769&r1=273768&r2=273769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Fri Jun 24 22:11:28 2016
@@ -70,6 +70,8 @@ class SITargetLowering final : public AM
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
bool isCFIntrinsic(const SDNode *Intr) const;
+
+ void createDebuggerPrologueStackObjects(MachineFunction &MF) const;
public:
SITargetLowering(const TargetMachine &tm, const SISubtarget &STI);
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=273769&r1=273768&r2=273769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Fri Jun 24 22:11:28 2016
@@ -54,6 +54,8 @@ SIMachineFunctionInfo::SIMachineFunction
ReturnsVoid(true),
MaximumWorkGroupSize(0),
DebuggerReservedVGPRCount(0),
+ DebuggerWorkGroupIDStackObjectIndices{0, 0, 0},
+ DebuggerWorkItemIDStackObjectIndices{0, 0, 0},
LDSWaveSpillSize(0),
PSInputEna(0),
NumUserSGPRs(0),
@@ -92,16 +94,16 @@ SIMachineFunctionInfo::SIMachineFunction
WorkItemIDX = true;
}
- if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+ if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
WorkGroupIDY = true;
- if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+ if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
WorkGroupIDZ = true;
- if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+ if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
WorkItemIDY = true;
- if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+ if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
WorkItemIDZ = true;
// X, XY, and XYZ are the only supported combinations, so make sure Y is
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h?rev=273769&r1=273768&r2=273769&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h Fri Jun 24 22:11:28 2016
@@ -64,6 +64,10 @@ class SIMachineFunctionInfo final : publ
// Number of reserved VGPRs for debugger usage.
unsigned DebuggerReservedVGPRCount;
+ // Stack object indices for work group IDs.
+ int DebuggerWorkGroupIDStackObjectIndices[3];
+ // Stack object indices for work item IDs.
+ int DebuggerWorkItemIDStackObjectIndices[3];
public:
// FIXME: Make private
@@ -334,6 +338,62 @@ public:
return DebuggerReservedVGPRCount;
}
+ /// \returns Stack object index for \p Dim's work group ID.
+ int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
+ assert(Dim < 3);
+ return DebuggerWorkGroupIDStackObjectIndices[Dim];
+ }
+
+ /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
+ void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
+ assert(Dim < 3);
+ DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
+ }
+
+ /// \returns Stack object index for \p Dim's work item ID.
+ int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
+ assert(Dim < 3);
+ return DebuggerWorkItemIDStackObjectIndices[Dim];
+ }
+
+ /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
+ void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
+ assert(Dim < 3);
+ DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
+ }
+
+ /// \returns SGPR used for \p Dim's work group ID.
+ unsigned getWorkGroupIDSGPR(unsigned Dim) const {
+ switch (Dim) {
+ case 0:
+ assert(hasWorkGroupIDX());
+ return WorkGroupIDXSystemSGPR;
+ case 1:
+ assert(hasWorkGroupIDY());
+ return WorkGroupIDYSystemSGPR;
+ case 2:
+ assert(hasWorkGroupIDZ());
+ return WorkGroupIDZSystemSGPR;
+ }
+ llvm_unreachable("unexpected dimension");
+ }
+
+ /// \returns VGPR used for \p Dim' work item ID.
+ unsigned getWorkItemIDVGPR(unsigned Dim) const {
+ switch (Dim) {
+ case 0:
+ assert(hasWorkItemIDX());
+ return AMDGPU::VGPR0;
+ case 1:
+ assert(hasWorkItemIDY());
+ return AMDGPU::VGPR1;
+ case 2:
+ assert(hasWorkItemIDZ());
+ return AMDGPU::VGPR2;
+ }
+ llvm_unreachable("unexpected dimension");
+ }
+
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};
Added: llvm/trunk/test/CodeGen/AMDGPU/debugger-emit-prologue.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/debugger-emit-prologue.ll?rev=273769&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/debugger-emit-prologue.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/debugger-emit-prologue.ll Fri Jun 24 22:11:28 2016
@@ -0,0 +1,80 @@
+; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-emit-prologue -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s --check-prefix=NOATTR
+
+; CHECK: debug_wavefront_private_segment_offset_sgpr = [[SOFF:[0-9]+]]
+; CHECK: debug_private_segment_buffer_sgpr = [[SREG:[0-9]+]]
+
+; CHECK: v_mov_b32_e32 [[WGIDX:v[0-9]+]], s{{[0-9]+}}
+; CHECK: buffer_store_dword [[WGIDX]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]]
+; CHECK: buffer_store_dword v0, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:16
+
+; CHECK: v_mov_b32_e32 [[WGIDY:v[0-9]+]], s{{[0-9]+}}
+; CHECK: buffer_store_dword [[WGIDY]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:4
+; CHECK: buffer_store_dword v1, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:20
+
+; CHECK: v_mov_b32_e32 [[WGIDZ:v[0-9]+]], s{{[0-9]+}}
+; CHECK: buffer_store_dword [[WGIDZ]], off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:8
+; CHECK: buffer_store_dword v2, off, s[{{[0-9]+:[0-9]+}}], s[[SOFF]] offset:24
+
+; CHECK: DebuggerWavefrontPrivateSegmentOffsetSGPR: s[[SOFF]]
+; CHECK: DebuggerPrivateSegmentBufferSGPR: s[[SREG]]
+
+; NOATTR-NOT: DebuggerWavefrontPrivateSegmentOffsetSGPR
+; NOATTR-NOT: DebuggerPrivateSegmentBufferSGPR
+
+; Function Attrs: nounwind
+define void @test(i32 addrspace(1)* %A) #0 !dbg !12 {
+entry:
+ %A.addr = alloca i32 addrspace(1)*, align 4
+ store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !17, metadata !18), !dbg !19
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !20
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20
+ store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21
+ %1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !22
+ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22
+ store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23
+ %2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !24
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24
+ store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25
+ ret void, !dbg !26
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!opencl.kernels = !{!3}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 269772)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "test01.cl", directory: "/home/kzhuravl/Lightning/testing")
+!2 = !{}
+!3 = !{void (i32 addrspace(1)*)* @test, !4, !5, !6, !7, !8}
+!4 = !{!"kernel_arg_addr_space", i32 1}
+!5 = !{!"kernel_arg_access_qual", !"none"}
+!6 = !{!"kernel_arg_type", !"int*"}
+!7 = !{!"kernel_arg_base_type", !"int*"}
+!8 = !{!"kernel_arg_type_qual", !""}
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.9.0 (trunk 269772)"}
+!12 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!13 = !DISubroutineType(types: !14)
+!14 = !{null, !15}
+!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32)
+!16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15)
+!18 = !DIExpression()
+!19 = !DILocation(line: 1, column: 30, scope: !12)
+!20 = !DILocation(line: 2, column: 3, scope: !12)
+!21 = !DILocation(line: 2, column: 8, scope: !12)
+!22 = !DILocation(line: 3, column: 3, scope: !12)
+!23 = !DILocation(line: 3, column: 8, scope: !12)
+!24 = !DILocation(line: 4, column: 3, scope: !12)
+!25 = !DILocation(line: 4, column: 8, scope: !12)
+!26 = !DILocation(line: 5, column: 1, scope: !12)
More information about the llvm-commits
mailing list