[PATCH] D64807: AMDGPU/GFX10: Apply the VMEM-to-scalar-write hazard also to writes to EXEC
Nicolai Hähnle via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 10:31:27 PDT 2019
nhaehnle updated this revision to Diff 210131.
nhaehnle added a comment.
Add missing test changes
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D64807/new/
https://reviews.llvm.org/D64807
Files:
lib/Target/AMDGPU/GCNHazardRecognizer.cpp
test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
test/CodeGen/AMDGPU/wave32.ll
Index: test/CodeGen/AMDGPU/wave32.ll
===================================================================
--- test/CodeGen/AMDGPU/wave32.ll
+++ test/CodeGen/AMDGPU/wave32.ll
@@ -1073,6 +1073,7 @@
; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]]], -1{{$}}
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT: v_nop
; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]]
@@ -1095,6 +1096,7 @@
; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT: v_nop
; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
Index: test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
===================================================================
--- test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
+++ test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
@@ -92,6 +92,7 @@
...
# GCN-LABEL: name: vmem_write_exec_impread
# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN: V_NOP
# GCN-NEXT: S_MOV_B64
---
name: vmem_write_exec_impread
@@ -208,3 +209,16 @@
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
S_BRANCH %bb.0
...
+# GCN-LABEL: name: ds_write_exec
+# GCN: DS_WRITE_B32_gfx9
+# GCN-NEXT: V_NOP
+# GCN-NEXT: S_MOV_B32
+---
+name: ds_write_exec
+body: |
+ bb.0:
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = IMPLICIT_DEF
+ DS_WRITE_B32_gfx9 $vgpr0, $vgpr1, 0, 0, implicit $exec
+ $exec_lo = S_MOV_B32 -1
+...
Index: test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
===================================================================
--- test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
+++ test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
@@ -16,6 +16,7 @@
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: flat_store_dword v[0:1], v0
; GCN-NEXT: BB0_2: ; %bb
+; GCN-NEXT: v_nop
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
Index: lib/Target/AMDGPU/GCNHazardRecognizer.cpp
===================================================================
--- lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -920,7 +920,7 @@
for (const MachineOperand &Def : MI->defs()) {
MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
- if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
+ if (!Op)
continue;
return true;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D64807.210131.patch
Type: text/x-patch
Size: 2834 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190716/b8b7cfd6/attachment.bin>
More information about the llvm-commits
mailing list