[llvm] r275113 - AMDGPU: Treat texture gather instructions more like other MIMG instructions

Nicolai Haehnle via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 11 14:59:44 PDT 2016


Author: nha
Date: Mon Jul 11 16:59:43 2016
New Revision: 275113

URL: http://llvm.org/viewvc/llvm-project?rev=275113&view=rev
Log:
AMDGPU: Treat texture gather instructions more like other MIMG instructions

Summary:
Setting MIMG to 0 has a bunch of unexpected side effects, including that
isVMEM returns false which leads to incorrect treatment in the hazard
recognizer. The reason I noticed it is that it also leads to incorrect
treatment in VGPR-to-SGPR copies, which is one cause of the referenced bug.

The only reason why MIMG was set to 0 is to signal the special handling of
dmasks, but that can be checked differently.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96877

Reviewers: arsenm, tstellarAMD

Subscribers: arsenm, kzhuravl, llvm-commits

Differential Revision: http://reviews.llvm.org/D22210

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIDefines.h
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.gather4.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=275113&r1=275112&r2=275113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Mon Jul 11 16:59:43 2016
@@ -40,7 +40,8 @@ enum {
   FLAT = 1 << 21,
   WQM = 1 << 22,
   VGPRSpill = 1 << 23,
-  VOPAsmPrefer32Bit = 1 << 24
+  VOPAsmPrefer32Bit = 1 << 24,
+  Gather4 = 1 << 25
 };
 }
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=275113&r1=275112&r2=275113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Jul 11 16:59:43 2016
@@ -3133,7 +3133,8 @@ SDNode *SITargetLowering::PostISelFoldin
   const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
   unsigned Opcode = Node->getMachineOpcode();
 
-  if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore())
+  if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
+      !TII->isGather4(Opcode))
     adjustWritemask(Node, DAG);
 
   if (Opcode == AMDGPU::INSERT_SUBREG ||

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=275113&r1=275112&r2=275113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Mon Jul 11 16:59:43 2016
@@ -48,6 +48,8 @@ class InstSI <dag outs, dag ins, string
   // is unable to infer the encoding from the operands.
   field bits<1> VOPAsmPrefer32Bit = 0;
 
+  field bits<1> Gather4 = 0;
+
   // These need to be kept in sync with the enum in SIInstrFlags.
   let TSFlags{0} = VM_CNT;
   let TSFlags{1} = EXP_CNT;
@@ -78,6 +80,7 @@ class InstSI <dag outs, dag ins, string
   let TSFlags{22} = WQM;
   let TSFlags{23} = VGPRSpill;
   let TSFlags{24} = VOPAsmPrefer32Bit;
+  let TSFlags{25} = Gather4;
 
   let SchedRW = [Write32Bit];
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=275113&r1=275112&r2=275113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Mon Jul 11 16:59:43 2016
@@ -310,6 +310,14 @@ public:
     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
   }
 
+  static bool isGather4(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
+  }
+
+  bool isGather4(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::Gather4;
+  }
+
   static bool isFLAT(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
   }

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=275113&r1=275112&r2=275113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Mon Jul 11 16:59:43 2016
@@ -3557,8 +3557,8 @@ class MIMG_Gather_Helper <bits<7> op, st
   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
   // (red,red,red,red) etc.) The ISA document doesn't mention
   // this.
-  // Therefore, disable all code which updates DMASK by setting these two:
-  let MIMG = 0;
+  // Therefore, disable all code which updates DMASK by setting this:
+  let Gather4 = 1;
   let hasPostISelHook = 0;
   let WQM = wqm;
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.gather4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.gather4.ll?rev=275113&r1=275112&r2=275113&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.gather4.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.SI.gather4.ll Mon Jul 11 16:59:43 2016
@@ -462,7 +462,27 @@ main_body:
   ret void
 }
 
-
+;CHECK-LABEL: {{^}}gather4_sgpr_bug:
+;
+; This crashed at some point due to a bug in FixSGPRCopies. Derived from the
+; report in https://bugs.freedesktop.org/show_bug.cgi?id=96877
+;
+;TODO: the readfirstlanes are unnecessary, see http://reviews.llvm.org/D22217
+;
+;CHECK: v_readfirstlane_b32 s[[LO:[0-9]+]], v{{[0-9]+}}
+;CHECK: v_readfirstlane_b32
+;CHECK: v_readfirstlane_b32
+;CHECK: v_readfirstlane_b32 s[[HI:[0-9]+]], v{{[0-9]+}}
+;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]] dmask:0x8
+define amdgpu_ps float @gather4_sgpr_bug() {
+main_body:
+  %tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef, align 16
+  %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
+  %tmp2 = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> %tmp1, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp4 = extractelement <4 x float> %tmp2, i32 1
+  %tmp9 = fadd float undef, %tmp4
+  ret float %tmp9
+}
 
 declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
 declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0




More information about the llvm-commits mailing list