[llvm] r275108 - AMDGPU: fix local stack slot allocation bugs

Nicolai Haehnle via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 11 14:44:40 PDT 2016


Author: nha
Date: Mon Jul 11 16:44:40 2016
New Revision: 275108

URL: http://llvm.org/viewvc/llvm-project?rev=275108&view=rev
Log:
AMDGPU: fix local stack slot allocation bugs

Summary:
The main bug fix here is using the 32-bit encoding of V_ADD_I32 in
materializeFrameBaseRegister and resolveFrameIndex, so that arbitrary
immediates work.

The second part is that we may now require the SegmentWaveByteOffset
even when there are initially no stack objects and VGPR spilling isn't
enabled, for stack slots that are allocated later. This means that some
bits become effectively dead and can be cleaned up.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96602
Tested-by: Kai Wasserbäch <kai at dev.carbon-project.org>

Reviewers: arsenm, tstellarAMD

Subscribers: arsenm, llvm-commits, kzhuravl

Differential Revision: http://reviews.llvm.org/D21551

Added:
    llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
    llvm/trunk/test/CodeGen/AMDGPU/selected-stack-object.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=275108&r1=275107&r2=275108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Mon Jul 11 16:44:40 2016
@@ -285,10 +285,13 @@ void SIRegisterInfo::materializeFrameBas
 
   MachineRegisterInfo &MRI = MF->getRegInfo();
   unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
+  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
+    .addImm(Offset);
   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
     .addReg(UnusedCarry, RegState::Define | RegState::Dead)
-    .addImm(Offset)
+    .addReg(OffsetReg, RegState::Kill)
     .addFrameIndex(FrameIdx);
 }
 
@@ -335,13 +338,16 @@ void SIRegisterInfo::resolveFrameIndex(M
   assert(Offset != 0 && "Non-zero offset expected");
 
   unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
   // In the case the instruction already had an immediate offset, here only
   // the requested new offset is added because we are leaving the original
   // immediate in place.
+  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
+    .addImm(Offset);
   BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), NewReg)
     .addReg(UnusedCarry, RegState::Define | RegState::Dead)
-    .addImm(Offset)
+    .addReg(OffsetReg, RegState::Kill)
     .addReg(BaseReg);
 
   FIOp->ChangeToRegister(NewReg, false);

Added: llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll?rev=275108&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-stack-slot-bug.ll Mon Jul 11 16:44:40 2016
@@ -0,0 +1,22 @@
+; RUN: llc -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck %s
+
+; This used to fail due to a v_add_i32 instruction with an illegal immediate
+; operand that was created during Local Stack Slot Allocation. Test case derived
+; from https://bugs.freedesktop.org/show_bug.cgi?id=96602
+;
+; CHECK-LABEL: {{^}}main:
+; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
+; CHECK: v_mov_b32_e32 [[HI_CONST:v[0-9]+]], 0x200
+; CHECK: v_mov_b32_e32 [[LO_CONST:v[0-9]+]], 0
+; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, [[BYTES]], [[HI_CONST]]
+; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BYTES]], [[LO_CONST]]
+; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
+; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
+define amdgpu_ps float @main(i32 %idx) {
+main_body:
+  %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD
 5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
+  %v2 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FE
 A477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
+  %r = fadd float %v1, %v2
+  ret float %r
+}

Added: llvm/trunk/test/CodeGen/AMDGPU/selected-stack-object.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/selected-stack-object.ll?rev=275108&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/selected-stack-object.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/selected-stack-object.ll Mon Jul 11 16:44:40 2016
@@ -0,0 +1,12 @@
+; XFAIL: *
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+
+; See also local-stack-slot-bug.ll
+; This fails because a stack object is created during instruction selection.
+
+; CHECK-LABEL: {{^}}main:
+define amdgpu_ps float @main(i32 %idx) {
+main_body:
+  %v1 = extractelement <81 x float> <float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD
 5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
+  ret float %v1
+}




More information about the llvm-commits mailing list