[llvm] r314828 - [AMDGPU] Avoid predicated execution of the basic blocks containing scalar
Alexander Timofeev via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 3 11:55:36 PDT 2017
Author: alex-t
Date: Tue Oct 3 11:55:36 2017
New Revision: 314828
URL: http://llvm.org/viewvc/llvm-project?rev=314828&view=rev
Log:
[AMDGPU] Avoid predicated execution of the basic blocks containing scalar
instructions.
Differential revision: https://reviews.llvm.org/D38293
Added:
llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp?rev=314828&r1=314827&r2=314828&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp Tue Oct 3 11:55:36 2017
@@ -132,6 +132,16 @@ bool SIInsertSkips::shouldSkip(const Mac
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;
+ // V_READFIRSTLANE/V_READLANE destination register may be used as operand
+ // by some SALU instruction. If exec mask is zero vector instruction
+ // defining the register that is used by the scalar one is not executed
+ // and scalar instruction will operate on undefined data. For
+ // V_READFIRSTLANE/V_READLANE we should avoid predicated execution.
+ if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) ||
+ (I->getOpcode() == AMDGPU::V_READLANE_B32)) {
+ return true;
+ }
+
if (I->isInlineAsm()) {
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
const char *AsmStr = I->getOperand(0).getSymbolName();
Added: llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir?rev=314828&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir Tue Oct 3 11:55:36 2017
@@ -0,0 +1,32 @@
+# RUN: llc -o - %s -march=amdgcn -mcpu=fiji -run-pass=si-insert-skips -verify-machineinstrs | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: readlane_exec0
+# GCN: bb.0
+# GCN: S_CBRANCH_EXECZ %bb.2
+
+---
+name: readlane_exec0
+
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: %vgpr1_vgpr2:0x00000001, %vgpr2_vgpr3:0x00000003
+
+ %vgpr4 = V_AND_B32_e32 1, %vgpr1, implicit %exec
+ V_CMP_EQ_U32_e32 1, killed %vgpr4, implicit-def %vcc, implicit %exec
+ %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed %vcc, implicit-def %exec, implicit-def %scc, implicit %exec
+ SI_MASK_BRANCH %bb.2, implicit %exec
+ S_BRANCH %bb.1
+
+ bb.1:
+
+ %sgpr10 = V_READFIRSTLANE_B32 %vgpr2, implicit %exec
+ %sgpr11 = V_READFIRSTLANE_B32 %vgpr3, implicit %exec
+ %sgpr10 = S_LOAD_DWORD_IMM killed %sgpr10_sgpr11, 0, 0
+ S_WAITCNT 127
+ %vgpr0 = V_XOR_B32_e32 killed %sgpr10, killed %vgpr0, implicit %exec
+
+ bb.2:
+
+ %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc
+...
More information about the llvm-commits
mailing list