[PATCH] D22210: AMDGPU: Treat texture gather instructions more like other MIMG instructions

Nicolai Hähnle via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 11 03:45:23 PDT 2016


nhaehnle created this revision.
nhaehnle added reviewers: arsenm, tstellarAMD.
nhaehnle added a subscriber: llvm-commits.
Herald added subscribers: kzhuravl, arsenm.

Setting MIMG to 0 has a bunch of unexpected side effects, including that
isVMEM returns false which leads to incorrect treatment in the hazard
recognizer. The reason I noticed it is that it also leads to incorrect
treatment in VGPR-to-SGPR copies, which is one cause of the referenced bug.

The only reason why MIMG was set to 0 is to signal the special handling of
dmasks, but that can be checked differently.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96877

http://reviews.llvm.org/D22210

Files:
  lib/Target/AMDGPU/SIISelLowering.cpp
  lib/Target/AMDGPU/SIInstrInfo.td
  test/CodeGen/AMDGPU/llvm.SI.gather4.ll

Index: test/CodeGen/AMDGPU/llvm.SI.gather4.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.SI.gather4.ll
+++ test/CodeGen/AMDGPU/llvm.SI.gather4.ll
@@ -462,7 +462,20 @@
   ret void
 }
 
-
+;CHECK-LABEL: {{^}}gather4_sgpr_bug:
+;
+; This crashed at some point due to a bug in FixSGPRCopies. Derived from the
+; report in https://bugs.freedesktop.org/show_bug.cgi?id=96877
+;
+define amdgpu_ps float @gather4_sgpr_bug() {
+main_body:
+  %tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef, align 16
+  %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
+  %tmp2 = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> %tmp1, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp4 = extractelement <4 x float> %tmp2, i32 1
+  %tmp9 = fadd float undef, %tmp4
+  ret float %tmp9
+}
 
 declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
 declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
Index: lib/Target/AMDGPU/SIInstrInfo.td
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.td
+++ lib/Target/AMDGPU/SIInstrInfo.td
@@ -3557,8 +3557,7 @@
   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
   // (red,red,red,red) etc.) The ISA document doesn't mention
   // this.
-  // Therefore, disable all code which updates DMASK by setting these two:
-  let MIMG = 0;
+  // Therefore, disable all code which updates DMASK by setting this:
   let hasPostISelHook = 0;
   let WQM = wqm;
 
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3133,7 +3133,8 @@
   const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
   unsigned Opcode = Node->getMachineOpcode();
 
-  if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore())
+  if (TII->isMIMG(Opcode) && TII->get(Opcode).hasPostISelHook() &&
+      !TII->get(Opcode).mayStore())
     adjustWritemask(Node, DAG);
 
   if (Opcode == AMDGPU::INSERT_SUBREG ||


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D22210.63483.patch
Type: text/x-patch
Size: 2228 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160711/31a2202e/attachment.bin>


More information about the llvm-commits mailing list