[llvm] r359961 - AMDGPU] gfx1010 hazard recognizer
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri May 3 21:30:58 PDT 2019
Author: rampitec
Date: Fri May 3 21:30:57 2019
New Revision: 359961
URL: http://llvm.org/viewvc/llvm-project?rev=359961&view=rev
Log:
AMDGPU] gfx1010 hazard recognizer
Differential Revision: https://reviews.llvm.org/D61536
Added:
llvm/trunk/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
llvm/trunk/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir
llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir
llvm/trunk/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir
llvm/trunk/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h
Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp?rev=359961&r1=359960&r2=359961&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Fri May 3 21:30:57 2019
@@ -20,6 +20,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -133,6 +134,12 @@ GCNHazardRecognizer::getHazardType(SUnit
&& checkVMEMHazards(MI) > 0)
return NoopHazard;
+ if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
+ return NoopHazard;
+
+ if (ST.hasNoDataDepHazard())
+ return NoHazard;
+
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
return NoopHazard;
@@ -181,6 +188,12 @@ unsigned GCNHazardRecognizer::PreEmitNoo
IsHazardRecognizerMode = true;
CurrCycleInstr = MI;
unsigned W = PreEmitNoopsCommon(MI);
+
+ fixVMEMtoScalarWriteHazards(MI);
+ fixSMEMtoVectorWriteHazards(MI);
+ fixVcmpxExecWARHazard(MI);
+ fixLdsBranchVmemWARHazard(MI);
+
CurrCycleInstr = nullptr;
return W;
}
@@ -191,12 +204,18 @@ unsigned GCNHazardRecognizer::PreEmitNoo
if (SIInstrInfo::isSMRD(*MI))
return std::max(WaitStates, checkSMRDHazards(MI));
- if (SIInstrInfo::isVALU(*MI))
- WaitStates = std::max(WaitStates, checkVALUHazards(MI));
-
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
+ if (ST.hasNSAtoVMEMBug())
+ WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
+
+ if (ST.hasNoDataDepHazard())
+ return WaitStates;
+
+ if (SIInstrInfo::isVALU(*MI))
+ WaitStates = std::max(WaitStates, checkVALUHazards(MI));
+
if (SIInstrInfo::isDPP(*MI))
WaitStates = std::max(WaitStates, checkDPPHazards(MI));
@@ -775,3 +794,243 @@ int GCNHazardRecognizer::checkReadM0Haza
return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
SMovRelWaitStates);
}
+
+bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
+ if (!ST.hasVMEMtoScalarWriteHazard())
+ return false;
+
+ if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
+ return false;
+
+ if (MI->getNumDefs() == 0)
+ return false;
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
+ if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
+ !SIInstrInfo::isFLAT(*I))
+ return false;
+
+ for (const MachineOperand &Def : MI->defs()) {
+ MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
+ if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
+ continue;
+ return true;
+ }
+ return false;
+ };
+
+ auto IsExpiredFn = [] (MachineInstr *MI, int) {
+ return MI && (SIInstrInfo::isVALU(*MI) ||
+ (MI->getOpcode() == AMDGPU::S_WAITCNT &&
+ !MI->getOperand(0).getImm()));
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
+ return true;
+}
+
+bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
+ if (!ST.hasSMEMtoVectorWriteHazard())
+ return false;
+
+ if (!SIInstrInfo::isVALU(*MI))
+ return false;
+
+ unsigned SDSTName;
+ switch (MI->getOpcode()) {
+ case AMDGPU::V_READLANE_B32:
+ case AMDGPU::V_READFIRSTLANE_B32:
+ SDSTName = AMDGPU::OpName::vdst;
+ break;
+ default:
+ SDSTName = AMDGPU::OpName::sdst;
+ break;
+ }
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
+ if (!SDST) {
+ for (auto MO : MI->implicit_operands()) {
+ if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
+ SDST = &MO;
+ break;
+ }
+ }
+ }
+
+ if (!SDST)
+ return false;
+
+ const unsigned SDSTReg = SDST->getReg();
+ auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
+ return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
+ };
+
+ // This assumes that there will be s_waitcnt lgkmcnt(0) or equivalent
+ // between any at risk SMEM and any SALU dependent on the SMEM results.
+ auto IsExpiredFn = [TII] (MachineInstr *MI, int) {
+ if (MI) {
+ if (TII->isSALU(*MI)) {
+ if (TII->isSOPP(*MI))
+ return false;
+ switch (MI->getOpcode()) {
+ case AMDGPU::S_SETVSKIP:
+ case AMDGPU::S_VERSION:
+ case AMDGPU::S_WAITCNT_VSCNT:
+ case AMDGPU::S_WAITCNT_VMCNT:
+ case AMDGPU::S_WAITCNT_EXPCNT:
+ case AMDGPU::S_WAITCNT_LGKMCNT:
+ return false;
+ default:
+ return true;
+ }
+ }
+ }
+ return false;
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
+ .addImm(0);
+ return true;
+}
+
+bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
+ if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
+ return false;
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
+ return false;
+
+ auto IsHazardFn = [TRI] (MachineInstr *I) {
+ if (SIInstrInfo::isVALU(*I))
+ return false;
+ return I->readsRegister(AMDGPU::EXEC, TRI);
+ };
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
+ if (!MI)
+ return false;
+ if (SIInstrInfo::isVALU(*MI)) {
+ if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
+ return true;
+ for (auto MO : MI->implicit_operands())
+ if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
+ return true;
+ }
+ if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+ (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
+ return true;
+ return false;
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0xfffe);
+ return true;
+}
+
+bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
+ if (!ST.hasLdsBranchVmemWARHazard())
+ return false;
+
+ auto IsHazardInst = [] (const MachineInstr *MI) {
+ if (SIInstrInfo::isDS(*MI))
+ return 1;
+ if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
+ return 2;
+ return 0;
+ };
+
+ auto InstType = IsHazardInst(MI);
+ if (!InstType)
+ return false;
+
+ auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
+ return I && (IsHazardInst(I) ||
+ (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+ I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
+ !I->getOperand(1).getImm()));
+ };
+
+ auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
+ if (!I->isBranch())
+ return false;
+
+ auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
+ auto InstType2 = IsHazardInst(I);
+ return InstType2 && InstType != InstType2;
+ };
+
+ auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
+ if (!I)
+ return false;
+
+ auto InstType2 = IsHazardInst(I);
+ if (InstType == InstType2)
+ return true;
+
+ return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+ I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
+ !I->getOperand(1).getImm();
+ };
+
+ return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
+ std::numeric_limits<int>::max();
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_VSCNT))
+ .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+ .addImm(0);
+
+ return true;
+}
+
+int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
+ int NSAtoVMEMWaitStates = 1;
+
+ if (!ST.hasNSAtoVMEMBug())
+ return 0;
+
+ if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
+ return 0;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
+ if (!Offset || (Offset->getImm() & 6) == 0)
+ return 0;
+
+ auto IsHazardFn = [TII] (MachineInstr *I) {
+ if (!SIInstrInfo::isMIMG(*I))
+ return false;
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
+ return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
+ TII->getInstSizeInBytes(*I) >= 16;
+ };
+
+ return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
+}
Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h?rev=359961&r1=359960&r2=359961&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h Fri May 3 21:30:57 2019
@@ -79,6 +79,12 @@ private:
int checkInlineAsmHazards(MachineInstr *IA);
int checkAnyInstHazards(MachineInstr *MI);
int checkReadM0Hazards(MachineInstr *SMovRel);
+ int checkNSAtoVMEMHazard(MachineInstr *MI);
+ bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
+ bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
+ bool fixVcmpxExecWARHazard(MachineInstr *MI);
+ bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
+
public:
GCNHazardRecognizer(const MachineFunction &MF);
// We can only issue one instruction per cycle.
Added: llvm/trunk/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir?rev=359961&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir Fri May 3 21:30:57 2019
@@ -0,0 +1,276 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: hazard_lds_branch_buf
+# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: hazard_lds_branch_buf
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_buf_branch_lds
+# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
+# GCN-NEXT: DS_READ_B32
+---
+name: hazard_buf_branch_lds
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_lds_branch_lds
+# GCN: bb.1:
+# GCN-NEXT: DS_READ_B32
+---
+name: no_hazard_lds_branch_lds
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_buf_branch_buf
+# GCN: bb.1:
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: no_hazard_buf_branch_buf
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_lds_branch_buf_fallthrough
+# GCN: bb.1:
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: no_hazard_lds_branch_buf_fallthrough
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+
+ bb.1:
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_lds_branch_buf_samebb
+# GCN: DS_READ_B32
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: no_hazard_lds_branch_buf_samebb
+body: |
+ bb.0:
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_lds_branch_buf_loop
+# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
+# GCN-NEXT: DS_READ_B32
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: hazard_lds_branch_buf_loop
+body: |
+ bb.0:
+ successors: %bb.0
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_BRANCH %bb.0
+...
+
+# GCN-LABEL: name: single_hazard_lds_branch_buf
+# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: single_hazard_lds_branch_buf
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_lds_branch_lds_buf
+# GCN: bb.1:
+# GCN-NEXT: DS_READ_B32
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: no_hazard_lds_branch_lds_buf
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_lds_buf_branch_buf
+# GCN: bb.1:
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: no_hazard_lds_buf_branch_buf
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_lds_branch_vscnt_1_buf
+# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: hazard_lds_branch_vscnt_1_buf
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_WAITCNT_VSCNT undef $sgpr_null, 1
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_lds_branch_vscnt_0_buf
+# GCN: bb.1:
+# GCN-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: no_hazard_lds_branch_vscnt_0_buf
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_WAITCNT_VSCNT undef $sgpr_null, 0
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_lds_branch_vscnt_s0_buf
+# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: hazard_lds_branch_vscnt_s0_buf
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_WAITCNT_VSCNT undef $sgpr0, 0
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_lds_vscnt_0_branch_buf
+# GCN: bb.1:
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+---
+name: no_hazard_lds_vscnt_0_branch_buf
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_WAITCNT_VSCNT undef $sgpr_null, 0
+ S_BRANCH %bb.1
+
+ bb.1:
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_lds_branch_global
+# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
+# GCN-NEXT: GLOBAL_LOAD_DWORD
+---
+name: hazard_lds_branch_global
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_lds_branch_scratch
+# GCN: S_WAITCNT_VSCNT undef $sgpr_null, 0
+# GCN-NEXT: SCRATCH_LOAD_DWORD
+---
+name: hazard_lds_branch_scratch
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ $vgpr1 = SCRATCH_LOAD_DWORD undef $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_lds_branch_flat
+# GCN: bb.1:
+# GCN-NEXT: FLAT_LOAD_DWORD
+---
+name: no_hazard_lds_branch_flat
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ $vgpr1 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ S_ENDPGM 0
+...
Added: llvm/trunk/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir?rev=359961&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir Fri May 3 21:30:57 2019
@@ -0,0 +1,61 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: hazard_image_sample_d_buf_off6
+# GCN: IMAGE_SAMPLE
+# GCN-NEXT: S_NOP 0
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET
+---
+name: hazard_image_sample_d_buf_off6
+body: |
+ bb.0:
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
+...
+
+# GCN-LABEL: name: no_hazard_image_sample_d_buf_off1
+# GCN: IMAGE_SAMPLE
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET
+---
+name: no_hazard_image_sample_d_buf_off1
+body: |
+ bb.0:
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 1, 0, 0, 0, 0, implicit $exec
+...
+
+# GCN-LABEL: name: no_hazard_image_sample_d_buf_far
+# GCN: IMAGE_SAMPLE
+# GCN-NEXT: V_NOP_e32
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET
+---
+name: no_hazard_image_sample_d_buf_far
+body: |
+ bb.0:
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ V_NOP_e32 implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
+...
+
+# Non-NSA
+# GCN-LABEL: name: no_hazard_image_sample_v4_v2_buf_off6
+# GCN: IMAGE_SAMPLE
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET
+---
+name: no_hazard_image_sample_v4_v2_buf_off6
+body: |
+ bb.0:
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
+...
+
+# Less than 4 dwords
+# GCN-LABEL: name: no_hazard_image_sample_v4_v3_buf_off6
+# GCN: IMAGE_SAMPLE
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFSET
+---
+name: no_hazard_image_sample_v4_v3_buf_off6
+body: |
+ bb.0:
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
+...
Added: llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir?rev=359961&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/smem-war-hazard.mir Fri May 3 21:30:57 2019
@@ -0,0 +1,193 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: hazard_smem_war
+# GCN: S_LOAD_DWORD_IMM
+# GCN: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_no_hazard
+# GCN: S_LOAD_DWORD_IMM
+# GCN-NEXT: S_ADD_U32
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_no_hazard
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr3 = S_ADD_U32 $sgpr4, $sgpr5, implicit-def $scc
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_related_clause
+# GCN: S_LOAD_DWORD_IMM
+# GCN: S_WAITCNT
+# GCN: S_ADD_U32
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_related_clause
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ S_WAITCNT 0
+ $sgpr3 = S_ADD_U32 $sgpr2, $sgpr4, implicit-def $scc
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_branch
+# GCN: S_LOAD_DWORD_IMM
+# GCN: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_branch
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr4, $vgpr0, $vgpr1
+ successors: %bb.1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ S_BRANCH %bb.1
+
+ bb.1:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_cbranch
+# GCN: S_AND_B64
+# GCN: S_LOAD_DWORD_IMM
+# GCN: S_CBRANCH_VCCZ
+# GCN-NOT: $sgpr_null = S_MOV_B32 0
+# GCN: V_CMP_EQ_F32
+# GCN: S_ENDPGM 0
+# GCN: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_cbranch
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
+ successors: %bb.1, %bb.2
+ $vcc = S_AND_B64 $sgpr4_sgpr5, $sgpr4_sgpr5, implicit-def $scc
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
+
+ bb.1:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1
+ $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+
+ bb.2:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_cbranch_carry
+# GCN: S_AND_B64
+# GCN: S_LOAD_DWORD_IMM
+# GCN: S_CBRANCH_VCCZ
+# GCN-NOT: $sgpr_null = S_MOV_B32 0
+# GCN: V_CMP_EQ_F32
+# GCN-NEXT: S_ENDPGM 0
+# GCN-NOT: $sgpr_null = S_MOV_B32 0
+# GCN: V_CMP_EQ_F32
+# GCN: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_cbranch_carry
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1
+ successors: %bb.1, %bb.2
+ $vcc = S_AND_B64 $sgpr4_sgpr5, $sgpr4_sgpr5, implicit-def $scc
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
+
+ bb.1:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1
+ $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+
+ bb.2:
+ successors: %bb.3
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1
+ $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+
+ bb.3:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_backedge
+# GCN: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+# GCN: S_LOAD_DWORD_IMM
+---
+name: hazard_smem_war_backedge
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
+ successors: %bb.1
+ $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec
+
+ bb.1:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ S_BRANCH %bb.0
+...
+
+# GCN-LABEL: name: hazard_smem_war_impdef
+# GCN: S_LOAD_DWORD_IMM
+# GCN: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_CMP_EQ_F32
+---
+name: hazard_smem_war_impdef
+body: |
+ bb.0:
+ liveins: $vcc, $vgpr0
+ $sgpr0 = S_LOAD_DWORD_IMM $vcc, 0, 0, 0
+ V_CMP_EQ_F32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $exec
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_readlane
+# GCN: S_LOAD_DWORD_IMM
+# GCN: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_READLANE_B32
+---
+name: hazard_smem_war_readlane
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr3, $vgpr0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0 = V_READLANE_B32 $vgpr0, $sgpr3
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_smem_war_readfirstlane
+# GCN: S_LOAD_DWORD_IMM
+# GCN: $sgpr_null = S_MOV_B32 0
+# GCN-NEXT: V_READFIRSTLANE_B32
+---
+name: hazard_smem_war_readfirstlane
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr0
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+ $sgpr0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+ S_ENDPGM 0
+...
Added: llvm/trunk/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir?rev=359961&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir Fri May 3 21:30:57 2019
@@ -0,0 +1,164 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-skips,post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: hazard_vcmpx_smov_exec_lo
+# GCN: $sgpr0 = S_MOV_B32 $exec_lo
+# GCN-NEXT: S_WAITCNT_DEPCTR 65534
+# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
+---
+name: hazard_vcmpx_smov_exec_lo
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_MOV_B32 $exec_lo
+ SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_vcmpx_smov_exec
+# GCN: $sgpr0_sgpr1 = S_MOV_B64 $exec
+# GCN-NEXT: S_WAITCNT_DEPCTR 65534
+# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
+---
+name: hazard_vcmpx_smov_exec
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0_sgpr1 = S_MOV_B64 $exec
+ SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_vcmpx_vmov_exec_lo
+# GCN: $vgpr0 = V_MOV_B32_e32 $exec_lo, implicit $exec
+# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
+---
+name: no_hazard_vcmpx_vmov_exec_lo
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr0 = V_MOV_B32_e32 $exec_lo, implicit $exec
+ SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_vcmpx_valu_impuse_exec
+# GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
+---
+name: no_hazard_vcmpx_valu_impuse_exec
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_imp
+# GCN: $sgpr0 = S_MOV_B32 $exec_lo
+# GCN-NEXT: $vgpr0 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
+---
+name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_imp
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_MOV_B32 $exec_lo
+ $vgpr0 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
+ SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_exp
+# GCN: $sgpr0 = S_MOV_B32 $exec_lo
+# GCN-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $vgpr0, 0, implicit $exec
+# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
+---
+name: no_hazard_vcmpx_smov_exec_lo_valu_writes_sgpr_exp
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_MOV_B32 $exec_lo
+ $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $vgpr0, 0, implicit $exec
+ SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_depctr_fffe
+# GCN: $sgpr0 = S_MOV_B32 $exec_lo
+# GCN-NEXT: S_WAITCNT_DEPCTR 65534
+# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
+---
+name: no_hazard_vcmpx_smov_exec_lo_depctr_fffe
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_MOV_B32 $exec_lo
+ S_WAITCNT_DEPCTR 65534
+ SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_depctr_ffff
+# GCN: $sgpr0 = S_MOV_B32 $exec_lo
+# GCN-NEXT: S_WAITCNT_DEPCTR 65535
+# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
+---
+name: no_hazard_vcmpx_smov_exec_lo_depctr_ffff
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_MOV_B32 $exec_lo
+ S_WAITCNT_DEPCTR 65535
+ SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: name: hazard_vcmpx_smov_exec_lo_depctr_effe
+# GCN: $sgpr0 = S_MOV_B32 $exec_lo
+# GCN: S_WAITCNT_DEPCTR 65534
+# GCN-NEXT: V_CMPX_LE_F32_nosdst_e32
+---
+name: hazard_vcmpx_smov_exec_lo_depctr_effe
+body: |
+ bb.0:
+ successors: %bb.1
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $sgpr0 = S_MOV_B32 $exec_lo
+ S_WAITCNT_DEPCTR 61438
+ SI_KILL_F32_COND_IMM_TERMINATOR $vgpr0, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_ENDPGM 0
+...
Added: llvm/trunk/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir?rev=359961&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir Fri May 3 21:30:57 2019
@@ -0,0 +1,210 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: vmem_write_sgpr
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: V_NOP
+# GCN-NEXT: S_MOV_B32
+---
+name: vmem_write_sgpr
+body: |
+ bb.0:
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+...
+# GCN-LABEL: name: vmem_smem_write_sgpr
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: V_NOP
+# GCN-NEXT: S_LOAD_DWORD_IMM
+---
+name: vmem_smem_write_sgpr
+body: |
+ bb.0:
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
+...
+# GCN-LABEL: name: vmem_snop_write_sgpr
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: S_NOP
+# GCN-NEXT: V_NOP
+# GCN-NEXT: S_MOV_B32
+---
+name: vmem_snop_write_sgpr
+body: |
+ bb.0:
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_NOP 0
+ $sgpr0 = S_MOV_B32 0
+...
+# GCN-LABEL: name: vmem_valu_write_sgpr
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: V_ADD_F32
+# GCN-NEXT: S_MOV_B32
+---
+name: vmem_valu_write_sgpr
+body: |
+ bb.0:
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+...
+# GCN-LABEL: name: vmem_swait0_write_sgpr
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: S_WAITCNT
+# GCN-NEXT: S_MOV_B32
+---
+name: vmem_swait0_write_sgpr
+body: |
+ bb.0:
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_WAITCNT 0
+ $sgpr0 = S_MOV_B32 0
+...
+# GCN-LABEL: name: vmem_swait_any_write_sgpr
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: S_WAITCNT
+# GCN-NEXT: V_NOP
+# GCN-NEXT: S_MOV_B32
+---
+name: vmem_swait_any_write_sgpr
+body: |
+ bb.0:
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_WAITCNT 1
+ $sgpr0 = S_MOV_B32 0
+...
+# GCN-LABEL: name: vmem_write_exec_impread
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: S_MOV_B64
+---
+name: vmem_write_exec_impread
+body: |
+ bb.0:
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ $exec = S_MOV_B64 7
+...
+# GCN-LABEL: name: vmem_write_exec_expread
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: V_NOP
+# GCN-NEXT: S_MOV_B64
+---
+name: vmem_write_exec_expread
+body: |
+ bb.0:
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $exec_lo, 0, 0, 0, 0, 0, implicit $exec
+ $exec = S_MOV_B64 7
+...
+# GCN-LABEL: name: ds_write_m0
+# GCN: DS_READ_B32
+# GCN-NEXT: V_NOP
+# GCN-NEXT: S_MOV_B32
+---
+name: ds_write_m0
+body: |
+ bb.0:
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
+ $m0 = S_MOV_B32 7
+...
+# GCN-LABEL: name: vmem_write_sgpr_fall_through
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN: V_NOP
+# GCN-NEXT: S_MOV_B32
+---
+name: vmem_write_sgpr_fall_through
+body: |
+ bb.0:
+ successors: %bb.1
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+
+ bb.1:
+ $sgpr0 = S_MOV_B32 0
+...
+# GCN-LABEL: name: vmem_write_sgpr_branch
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: S_BRANCH
+# GCN: V_NOP
+# GCN-NEXT: S_MOV_B32
+---
+name: vmem_write_sgpr_branch
+body: |
+ bb.0:
+ successors: %bb.1
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ $sgpr0 = S_MOV_B32 0
+...
+# GCN-LABEL: name: vmem_write_sgpr_branch_around
+# GCN: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: S_BRANCH
+# GCN: bb.2:
+# GCN-NEXT: V_NOP
+# GCN-NEXT: S_MOV_B32
+---
+name: vmem_write_sgpr_branch_around
+body: |
+ bb.0:
+ successors: %bb.2
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.1:
+ successors: %bb.2
+ S_WAITCNT 0
+
+ bb.2:
+ $sgpr0 = S_MOV_B32 0
+...
+# GCN-LABEL: name: vmem_write_sgpr_branch_backedge
+# GCN: $vgpr0 = IMPLICIT_DEF
+# GCN-NEXT: V_NOP
+# GCN-NEXT: S_MOV_B32
+---
+name: vmem_write_sgpr_branch_backedge
+body: |
+ bb.0:
+ successors: %bb.1
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $sgpr4 = IMPLICIT_DEF
+ $vgpr0 = IMPLICIT_DEF
+ $sgpr0 = S_MOV_B32 0
+
+ bb.1:
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
+ S_BRANCH %bb.0
+...
More information about the llvm-commits
mailing list