[llvm] r256870 - AMDGPU/SI: Do not move scratch resource register on Tonga & Iceland

Tue Jan 5 12:42:49 PST 2016

Author: nha
Date: Tue Jan  5 14:42:49 2016
New Revision: 256870

URL: http://llvm.org/viewvc/llvm-project?rev=256870&view=rev
Log:
AMDGPU/SI: Do not move scratch resource register on Tonga & Iceland

Due to the SGPR init bug, every program claims to use the same number
of SGPRs anyway, so there's no point in trying to shift those registers
down from their initial spot of reservation.

Add a test that uses VGPR spilling and blocks most SGPRs from being used for
the scratch resource register. Previously, this would run into an assertion.

Differential Revision: http://reviews.llvm.org/D15724

Added:
    llvm/trunk/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll
    llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=256870&r1=256869&r2=256870&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Tue Jan  5 14:42:49 2016
@@ -105,51 +105,53 @@ void SIFrameLowering::emitPrologue(Machi
     MBB.addLiveIn(PreloadedPrivateBufferReg);
   }
 
-  // We reserved the last registers for this. Shift it down to the end of those
-  // which were actually used.
-  //
-  // FIXME: It might be safer to use a pseudoregister before replacement.
-
-  // FIXME: We should be able to eliminate unused input registers. We only
-  // cannot do this for the resources required for scratch access. For now we
-  // skip over user SGPRs and may leave unused holes.
-
-  // We find the resource first because it has an alignment requirement.
-  if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
-    MachineRegisterInfo &MRI = MF.getRegInfo();
-
-    unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
-    // Skip the last 2 elements because the last one is reserved for VCC, and
-    // this is the 2nd to last element already.
-    for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
-      // Pick the first unallocated one. Make sure we don't clobber the other
-      // reserved input we needed.
-      if (!MRI.isPhysRegUsed(Reg)) {
-        assert(MRI.isAllocatable(Reg));
-        MRI.replaceRegWith(ScratchRsrcReg, Reg);
-        ScratchRsrcReg = Reg;
-        MFI->setScratchRSrcReg(ScratchRsrcReg);
-        break;
+  if (!ST.hasSGPRInitBug()) {
+    // We reserved the last registers for this. Shift it down to the end of those
+    // which were actually used.
+    //
+    // FIXME: It might be safer to use a pseudoregister before replacement.
+
+    // FIXME: We should be able to eliminate unused input registers. We only
+    // cannot do this for the resources required for scratch access. For now we
+    // skip over user SGPRs and may leave unused holes.
+
+    // We find the resource first because it has an alignment requirement.
+    if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
+      MachineRegisterInfo &MRI = MF.getRegInfo();
+
+      unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
+      // Skip the last 2 elements because the last one is reserved for VCC, and
+      // this is the 2nd to last element already.
+      for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
+        // Pick the first unallocated one. Make sure we don't clobber the other
+        // reserved input we needed.
+        if (!MRI.isPhysRegUsed(Reg)) {
+          assert(MRI.isAllocatable(Reg));
+          MRI.replaceRegWith(ScratchRsrcReg, Reg);
+          ScratchRsrcReg = Reg;
+          MFI->setScratchRSrcReg(ScratchRsrcReg);
+          break;
+        }
       }
     }
-  }
 
-  if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
-    MachineRegisterInfo &MRI = MF.getRegInfo();
-    // Skip the last 2 elements because the last one is reserved for VCC, and
-    // this is the 2nd to last element already.
-    unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
-    for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
-      // Pick the first unallocated SGPR. Be careful not to pick an alias of the
-      // scratch descriptor, since we havenât added its uses yet.
-      if (!MRI.isPhysRegUsed(Reg)) {
-        assert(MRI.isAllocatable(Reg) &&
-               !TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
-
-        MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
-        ScratchWaveOffsetReg = Reg;
-        MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
-        break;
+    if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
+      MachineRegisterInfo &MRI = MF.getRegInfo();
+      // Skip the last 2 elements because the last one is reserved for VCC, and
+      // this is the 2nd to last element already.
+      unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
+      for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
+        // Pick the first unallocated SGPR. Be careful not to pick an alias of the
+        // scratch descriptor, since we havenât added its uses yet.
+        if (!MRI.isPhysRegUsed(Reg)) {
+          assert(MRI.isAllocatable(Reg) &&
+                !TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
+
+          MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+          ScratchWaveOffsetReg = Reg;
+          MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+          break;
+        }
       }
     }
   }

Modified: llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll?rev=256870&r1=256869&r2=256870&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll Tue Jan  5 14:42:49 2016
@@ -1,7 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=carrizo < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
 ; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s -mattr=-flat-for-global | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s
 
 ; FIXME: align on alloca seems to be ignored for private_segment_alignment
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll?rev=256870&r1=256869&r2=256870&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll Tue Jan  5 14:42:49 2016
@@ -1,5 +1,5 @@
 ; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=carrizo < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
 
 ; ALL-LABEL: {{^}}large_alloca_pixel_shader:
 ; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0

Added: llvm/trunk/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll?rev=256870&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll Tue Jan  5 14:42:49 2016
@@ -0,0 +1,24 @@
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=TONGA %s
+
+; On Tonga and Iceland, limited SGPR availability means care must be taken to
+; allocate scratch registers correctly. Check that this test compiles without
+; error.
+; TONGA-LABEL: test
+define void @test(<256 x i32> addrspace(1)* %out, <256 x i32> addrspace(1)* %in) {
+entry:
+  %tid = call i32 @llvm.SI.tid() nounwind readnone
+  %aptr = getelementptr <256 x i32>, <256 x i32> addrspace(1)* %in, i32 %tid
+  %a = load <256 x i32>, <256 x i32> addrspace(1)* %aptr
+  call void asm sideeffect "", "~{memory}" ()
+  %outptr = getelementptr <256 x i32>, <256 x i32> addrspace(1)* %in, i32 %tid
+  store <256 x i32> %a, <256 x i32> addrspace(1)* %outptr
+
+; mark 128-bit SGPR registers as used so they are unavailable for the
+; scratch resource descriptor
+  call void asm sideeffect "", "~{SGPR4},~{SGPR8},~{SGPR12},~{SGPR16},~{SGPR20},~{SGPR24},~{SGPR28}" ()
+  call void asm sideeffect "", "~{SGPR32},~{SGPR36},~{SGPR40},~{SGPR44},~{SGPR48},~{SGPR52},~{SGPR56}" ()
+  call void asm sideeffect "", "~{SGPR60},~{SGPR64},~{SGPR68}" ()
+  ret void
+}
+
+declare i32 @llvm.SI.tid() nounwind readnone