[llvm] r343672 - [AMDGPU] Fix for negative offsets in buffer/tbuffer intrinsics

Wed Oct 3 03:29:43 PDT 2018

Author: tpr
Date: Wed Oct  3 03:29:43 2018
New Revision: 343672

URL: http://llvm.org/viewvc/llvm-project?rev=343672&view=rev
Log:
[AMDGPU] Fix for negative offsets in buffer/tbuffer intrinsics

Summary:
The new buffer/tbuffer intrinsics handle an out-of-range immediate
offset by moving/adding offset&-4096 to a vgpr, leaving an in-range
immediate offset, with a chance of the move/add being CSEd for similar
loads/stores.

However it turns out that a negative offset in a vgpr is illegal, even
if adding the immediate offset makes it legal again.

Therefore, this commit disables the offset&-4096 thing if the offset is
negative.

Differential Revision: https://reviews.llvm.org/D52683

Change-Id: Ie02f0a74f240a138dc2a29d17cfbd9e350e4ed13

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=343672&r1=343671&r2=343672&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Oct  3 03:29:43 2018
@@ -5983,11 +5983,18 @@ std::pair<SDValue, SDValue> SITargetLowe
   if (C1) {
     unsigned ImmOffset = C1->getZExtValue();
     // If the immediate value is too big for the immoffset field, put the value
-    // mod 4096 into the immoffset field so that the value that is copied/added
+    // and -4096 into the immoffset field so that the value that is copied/added
     // for the voffset field is a multiple of 4096, and it stands more chance
     // of being CSEd with the copy/add for another similar load/store.
+    // However, do not do that rounding down to a multiple of 4096 if that is a
+    // negative number, as it appears to be illegal to have a negative offset
+    // in the vgpr, even if adding the immediate offset makes it positive.
     unsigned Overflow = ImmOffset & ~MaxImm;
     ImmOffset -= Overflow;
+    if ((int32_t)Overflow < 0) {
+      Overflow += ImmOffset;
+      ImmOffset = 0;
+    }
     C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
     if (Overflow) {
       auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll?rev=343672&r1=343671&r2=343672&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll Wed Oct  3 03:29:43 2018
@@ -74,8 +74,8 @@ main_body:
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_negative_offset:
-;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, 0xfffff000, v0
-;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen offset:4080
+;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0
+;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen
 define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) {
 main_body:
   %ofs.1 = add i32 %ofs, -16

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll?rev=343672&r1=343671&r2=343672&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll Wed Oct  3 03:29:43 2018
@@ -102,8 +102,8 @@ main_body:
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_negative_offset:
-;CHECK: v_add_{{[iu]}}32_e32 {{v[0-9]+}}, vcc, 0xfffff000, v0
-;CHECK: buffer_load_dwordx4 v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen offset:4080
+;CHECK: v_add_{{[iu]}}32_e32 {{v[0-9]+}}, vcc, -16, v0
+;CHECK: buffer_load_dwordx4 v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen
 define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) {
 main_body:
   %ofs.1 = add i32 %ofs, -16