[PATCH] D22210: AMDGPU: Treat texture gather instructions more like other MIMG instructions
Nicolai Hähnle via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 11 14:29:29 PDT 2016
nhaehnle updated this revision to Diff 63575.
nhaehnle marked 2 inline comments as done.
nhaehnle added a comment.
The trouble is that the operand is named "dmask" for all image sample _and_
gather4 instructions :)
Added the Gather4 flag.
http://reviews.llvm.org/D22210
Files:
lib/Target/AMDGPU/SIDefines.h
lib/Target/AMDGPU/SIISelLowering.cpp
lib/Target/AMDGPU/SIInstrFormats.td
lib/Target/AMDGPU/SIInstrInfo.td
test/CodeGen/AMDGPU/llvm.SI.gather4.ll
Index: test/CodeGen/AMDGPU/llvm.SI.gather4.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.SI.gather4.ll
+++ test/CodeGen/AMDGPU/llvm.SI.gather4.ll
@@ -462,7 +462,27 @@
ret void
}
-
+;CHECK-LABEL: {{^}}gather4_sgpr_bug:
+;
+; This crashed at some point due to a bug in FixSGPRCopies. Derived from the
+; report in https://bugs.freedesktop.org/show_bug.cgi?id=96877
+;
+;TODO: the readfirstlanes are unnecessary, see http://reviews.llvm.org/D22217
+;
+;CHECK: v_readfirstlane_b32 s[[LO:[0-9]+]], v{{[0-9]+}}
+;CHECK: v_readfirstlane_b32
+;CHECK: v_readfirstlane_b32
+;CHECK: v_readfirstlane_b32 s[[HI:[0-9]+]], v{{[0-9]+}}
+;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]] dmask:0x8
+define amdgpu_ps float @gather4_sgpr_bug() {
+main_body:
+ %tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef, align 16
+ %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
+ %tmp2 = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> %tmp1, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %tmp4 = extractelement <4 x float> %tmp2, i32 1
+ %tmp9 = fadd float undef, %tmp4
+ ret float %tmp9
+}
declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
Index: lib/Target/AMDGPU/SIInstrInfo.td
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.td
+++ lib/Target/AMDGPU/SIInstrInfo.td
@@ -3557,8 +3557,8 @@
// 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
// (red,red,red,red) etc.) The ISA document doesn't mention
// this.
- // Therefore, disable all code which updates DMASK by setting these two:
- let MIMG = 0;
+ // Therefore, disable all code which updates DMASK by setting this:
+ let Gather4 = 1;
let hasPostISelHook = 0;
let WQM = wqm;
Index: lib/Target/AMDGPU/SIInstrFormats.td
===================================================================
--- lib/Target/AMDGPU/SIInstrFormats.td
+++ lib/Target/AMDGPU/SIInstrFormats.td
@@ -48,6 +48,8 @@
// is unable to infer the encoding from the operands.
field bits<1> VOPAsmPrefer32Bit = 0;
+ field bits<1> Gather4 = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
@@ -78,6 +80,7 @@
let TSFlags{22} = WQM;
let TSFlags{23} = VGPRSpill;
let TSFlags{24} = VOPAsmPrefer32Bit;
+ let TSFlags{25} = Gather4;
let SchedRW = [Write32Bit];
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3133,7 +3133,8 @@
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
unsigned Opcode = Node->getMachineOpcode();
- if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore())
+ if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
+ !(TII->get(Opcode).TSFlags & SIInstrFlags::Gather4))
adjustWritemask(Node, DAG);
if (Opcode == AMDGPU::INSERT_SUBREG ||
Index: lib/Target/AMDGPU/SIDefines.h
===================================================================
--- lib/Target/AMDGPU/SIDefines.h
+++ lib/Target/AMDGPU/SIDefines.h
@@ -40,7 +40,8 @@
FLAT = 1 << 21,
WQM = 1 << 22,
VGPRSpill = 1 << 23,
- VOPAsmPrefer32Bit = 1 << 24
+ VOPAsmPrefer32Bit = 1 << 24,
+ Gather4 = 1 << 25
};
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D22210.63575.patch
Type: text/x-patch
Size: 3639 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160711/3ffceed2/attachment.bin>
More information about the llvm-commits
mailing list