[llvm] r267563 - [AMDGPU] Reserve VGPRs for trap handler usage if instructed

Konstantin Zhuravlyov via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 26 08:43:15 PDT 2016


Author: kzhuravl
Date: Tue Apr 26 10:43:14 2016
New Revision: 267563

URL: http://llvm.org/viewvc/llvm-project?rev=267563&view=rev
Log:
[AMDGPU] Reserve VGPRs for trap handler usage if instructed

Differential Revision: http://reviews.llvm.org/D19235

Added:
    llvm/trunk/test/CodeGen/AMDGPU/debugger_reserve_trap_regs.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=267563&r1=267562&r2=267563&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Tue Apr 26 10:43:14 2016
@@ -328,6 +328,13 @@ def FeatureDebuggerInsertNops : Subtarge
   "Insert two nop instructions for each high level source statement"
 >;
 
+def FeatureDebuggerReserveTrapRegs : SubtargetFeature<
+  "amdgpu-debugger-reserve-trap-regs",
+  "DebuggerReserveTrapVGPRs",
+  "true",
+  "Reserve VGPRs for trap handler usage"
+>;
+
 //===----------------------------------------------------------------------===//
 
 def AMDGPUInstrInfo : InstrInfo {

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=267563&r1=267562&r2=267563&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Tue Apr 26 10:43:14 2016
@@ -235,6 +235,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunct
       OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
                                   " bytes/workgroup (compile time only)", false);
 
+      OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst),
+                                  false);
+      OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount),
+                                  false);
+
       OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
                                   Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
                                   false);
@@ -472,6 +477,14 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
 
   MaxSGPR += ExtraSGPRs;
 
+  // Update necessary Reserved* fields and max VGPRs used if
+  // "amdgpu-debugger-reserved-trap-regs" was specified.
+  if (STM.debuggerReserveTrapVGPRs()) {
+    ProgInfo.ReservedVGPRFirst = MaxVGPR + 1;
+    ProgInfo.ReservedVGPRCount = STM.debuggerReserveTrapVGPRCount();
+    MaxVGPR += STM.debuggerReserveTrapVGPRCount();
+  }
+
   // We found the maximum register index. They start at 0, so add one to get the
   // number of registers.
   ProgInfo.NumVGPR = MaxVGPR + 1;
@@ -694,6 +707,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCode
   header.workitem_vgpr_count = KernelInfo.NumVGPR;
   header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
   header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
+  header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
+  header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
 
   AMDGPUTargetStreamer *TS =
       static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h?rev=267563&r1=267562&r2=267563&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h Tue Apr 26 10:43:14 2016
@@ -40,6 +40,8 @@ private:
       NumVGPR(0),
       NumSGPR(0),
       FlatUsed(false),
+      ReservedVGPRFirst(0),
+      ReservedVGPRCount(0),
       VCCUsed(false),
       CodeLen(0) {}
 
@@ -67,6 +69,9 @@ private:
     uint32_t LDSSize;
     bool FlatUsed;
 
+    uint16_t ReservedVGPRFirst;
+    uint16_t ReservedVGPRCount;
+
     // Bonus information for debugging.
     bool VCCUsed;
     uint64_t CodeLen;

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=267563&r1=267562&r2=267563&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Tue Apr 26 10:43:14 2016
@@ -98,7 +98,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
       LDSBankCount(0),
       IsaVersion(ISAVersion0_0_0),
       EnableSIScheduler(false),
-      DebuggerInsertNops(false),
+      DebuggerInsertNops(false), DebuggerReserveTrapVGPRs(false),
       FrameLowering(nullptr),
       GISel(),
       InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=267563&r1=267562&r2=267563&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Tue Apr 26 10:43:14 2016
@@ -96,6 +96,7 @@ private:
   unsigned IsaVersion;
   bool EnableSIScheduler;
   bool DebuggerInsertNops;
+  bool DebuggerReserveTrapVGPRs;
 
   std::unique_ptr<AMDGPUFrameLowering> FrameLowering;
   std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@@ -309,6 +310,14 @@ public:
     return DebuggerInsertNops;
   }
 
+  bool debuggerReserveTrapVGPRs() const {
+    return DebuggerReserveTrapVGPRs;
+  }
+
+  unsigned debuggerReserveTrapVGPRCount() const {
+    return debuggerReserveTrapVGPRs() ? 4 : 0;
+  }
+
   bool dumpCode() const {
     return DumpCode;
   }

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=267563&r1=267562&r2=267563&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Tue Apr 26 10:43:14 2016
@@ -193,6 +193,17 @@ BitVector SIRegisterInfo::getReservedReg
     assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
   }
 
+  // Reserve VGPRs for trap handler usage if "amdgpu-debugger-reserve-trap-regs"
+  // attribute was specified.
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  if (ST.debuggerReserveTrapVGPRs()) {
+    for (unsigned i = MaxWorkGroupVGPRCount - ST.debuggerReserveTrapVGPRCount();
+           i < MaxWorkGroupVGPRCount; ++i) {
+      unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
+      reserveRegisterTuples(Reserved, Reg);
+    }
+  }
+
   return Reserved;
 }
 

Added: llvm/trunk/test/CodeGen/AMDGPU/debugger_reserve_trap_regs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/debugger_reserve_trap_regs.ll?rev=267563&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/debugger_reserve_trap_regs.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/debugger_reserve_trap_regs.ll Tue Apr 26 10:43:14 2016
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-trap-regs -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK: reserved_vgpr_count = 4
+; CHECK: ReservedVGPRCount: 4
+
+; Function Attrs: nounwind
+define void @debugger_reserve_trap_regs(i32 addrspace(1)* %A) #0 {
+entry:
+  %A.addr = alloca i32 addrspace(1)*, align 4
+  store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
+  %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0
+  store i32 1, i32 addrspace(1)* %arrayidx, align 4
+  %1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1
+  store i32 2, i32 addrspace(1)* %arrayidx1, align 4
+  %2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2
+  store i32 3, i32 addrspace(1)* %arrayidx2, align 4
+  %3 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 4
+  store i32 4, i32 addrspace(1)* %arrayidx3, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!opencl.kernels = !{!0}
+!llvm.ident = !{!6}
+
+!0 = !{void (i32 addrspace(1)*)* @debugger_reserve_trap_regs, !1, !2, !3, !4, !5}
+!1 = !{!"kernel_arg_addr_space", i32 1}
+!2 = !{!"kernel_arg_access_qual", !"none"}
+!3 = !{!"kernel_arg_type", !"int*"}
+!4 = !{!"kernel_arg_base_type", !"int*"}
+!5 = !{!"kernel_arg_type_qual", !""}
+!6 = !{!"clang version 3.9.0 (trunk 266639)"}




More information about the llvm-commits mailing list