[PATCH] D39171: AMDGPU: Handle s_buffer_load_dword hazard on SI
Marek Olšák via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 22 14:13:06 PDT 2017
mareko created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, wdng, kzhuravl.
https://reviews.llvm.org/D39171
Files:
lib/Target/AMDGPU/GCNHazardRecognizer.cpp
test/CodeGen/AMDGPU/smrd.ll
Index: test/CodeGen/AMDGPU/smrd.ll
===================================================================
--- test/CodeGen/AMDGPU/smrd.ll
+++ test/CodeGen/AMDGPU/smrd.ll
@@ -85,6 +85,23 @@
ret void
}
+; GCN-LABEL: {{^}}smrd_hazard:
+; GCN-DAG: s_mov_b32 s3, 3
+; GCN-DAG: s_mov_b32 s2, 2
+; GCN-DAG: s_mov_b32 s1, 1
+; GCN-DAG: s_mov_b32 s0, 0
+; SI-NEXT: nop 3
+; GCN-NEXT: s_buffer_load_dword s0, s[0:3], 0x0
+define amdgpu_ps float @smrd_hazard(<4 x i32> inreg %desc) #0 {
+main_body:
+ %d0 = insertelement <4 x i32> undef, i32 0, i32 0
+ %d1 = insertelement <4 x i32> %d0, i32 1, i32 1
+ %d2 = insertelement <4 x i32> %d1, i32 2, i32 2
+ %d3 = insertelement <4 x i32> %d2, i32 3, i32 3
+ %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %d3, i32 0)
+ ret float %r
+}
+
; SMRD load using the load.const.v4i32 intrinsic with an immediate offset
; GCN-LABEL: {{^}}smrd_load_const0:
; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
Index: lib/Target/AMDGPU/GCNHazardRecognizer.cpp
===================================================================
--- lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -343,6 +343,36 @@
SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
+
+ // This fixes what appears to be undocumented hardware behavior in SI where
+ // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
+ // needs some number of nops in between. We don't know how many we need, but
+ // let's use 4. This wasn't discovered before probably because the only
+ // case when this happens is when we expand a 64-bit pointer into a full
+ // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
+ // probably never encountered in the closed-source land.
+ if (SMRD->getOpcode() == AMDGPU::S_BUFFER_LOAD_DWORD_IMM ||
+ SMRD->getOpcode() == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM ||
+ SMRD->getOpcode() == AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM ||
+ SMRD->getOpcode() == AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM ||
+ SMRD->getOpcode() == AMDGPU::S_BUFFER_LOAD_DWORDX16_IMM ||
+ SMRD->getOpcode() == AMDGPU::S_BUFFER_LOAD_DWORD_SGPR ||
+ SMRD->getOpcode() == AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR ||
+ SMRD->getOpcode() == AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR ||
+ SMRD->getOpcode() == AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR ||
+ SMRD->getOpcode() == AMDGPU::S_BUFFER_LOAD_DWORDX16_SGPR) {
+ int SmrdSgprWaitStates = 4;
+ auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
+
+ for (const MachineOperand &Use : SMRD->uses()) {
+ if (!Use.isReg())
+ continue;
+ int WaitStatesNeededForUse =
+ SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+ }
+
return WaitStatesNeeded;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D39171.119799.patch
Type: text/x-patch
Size: 3034 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171022/eaf65b1c/attachment.bin>
More information about the llvm-commits
mailing list