[llvm] r283622 - AMDGPU/SI: Handle div_fmas hazard in GCNHazardRecognizer
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 7 16:42:48 PDT 2016
Author: tstellar
Date: Fri Oct 7 18:42:48 2016
New Revision: 283622
URL: http://llvm.org/viewvc/llvm-project?rev=283622&view=rev
Log:
AMDGPU/SI: Handle div_fmas hazard in GCNHazardRecognizer
Reviewers: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D25250
Added:
llvm/trunk/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h
Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp?rev=283622&r1=283621&r2=283622&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Fri Oct 7 18:42:48 2016
@@ -38,6 +38,10 @@ void GCNHazardRecognizer::EmitInstructio
CurrCycleInstr = MI;
}
+static bool isDivFMas(unsigned Opcode) {
+ return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
+}
+
ScheduleHazardRecognizer::HazardType
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
@@ -51,6 +55,9 @@ GCNHazardRecognizer::getHazardType(SUnit
if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
return NoopHazard;
+ if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
+ return NoopHazard;
+
return NoHazard;
}
@@ -68,6 +75,9 @@ unsigned GCNHazardRecognizer::PreEmitNoo
if (SIInstrInfo::isDPP(*MI))
return std::max(0, checkDPPHazards(MI));
+ if (isDivFMas(MI->getOpcode()))
+ return std::max(0, checkDivFMasHazards(MI));
+
return 0;
}
@@ -262,3 +272,15 @@ int GCNHazardRecognizer::checkDPPHazards
return WaitStatesNeeded;
}
+
+int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ // v_div_fmas requires 4 wait states after a write to vcc from a VALU
+ // instruction.
+ const int DivFMasWaitStates = 4;
+ auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
+ int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
+
+ return DivFMasWaitStates - WaitStatesNeeded;
+}
Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h?rev=283622&r1=283621&r2=283622&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h Fri Oct 7 18:42:48 2016
@@ -43,6 +43,7 @@ class GCNHazardRecognizer final : public
int checkSMRDHazards(MachineInstr *SMRD);
int checkVMEMHazards(MachineInstr* VMEM);
int checkDPPHazards(MachineInstr *DPP);
+ int checkDivFMasHazards(MachineInstr *DivFMas);
public:
GCNHazardRecognizer(const MachineFunction &MF);
// We can only issue one instruction per cycle.
Added: llvm/trunk/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir?rev=283622&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir (added)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir Fri Oct 7 18:42:48 2016
@@ -0,0 +1,60 @@
+# RUN: llc -march=amdgcn -run-pass post-RA-hazard-rec %s -o - | FileCheck %s
+
+# CHECK-LABEL: bb.0:
+# CHECK: S_MOV_B64
+# CHECK-NOT: S_NOP
+# CHECK: V_DIV_FMAS
+
+# CHECK-LABEL: bb.1:
+# CHECK: V_CMP_EQ_I32
+# CHECK: S_NOP
+# CHECK: S_NOP
+# CHECK: S_NOP
+# CHECK: S_NOP
+# CHECK: V_DIV_FMAS_F32
+
+# CHECK-LABEL: bb.2:
+# CHECK: V_CMP_EQ_I32
+# CHECK: S_NOP
+# CHECK: S_NOP
+# CHECK: S_NOP
+# CHECK: S_NOP
+# CHECK: V_DIV_FMAS_F32
+
+# CHECK-LABEL: bb.3:
+# CHECK: V_DIV_SCALE_F32
+# CHECK: S_NOP
+# CHECK: S_NOP
+# CHECK: S_NOP
+# CHECK: S_NOP
+# CHECK: V_DIV_FMAS_F32
+--- |
+ define void @test0() { ret void }
+...
+---
+name: test0
+
+body: |
+ bb.0:
+ successors: %bb.1
+ %vcc = S_MOV_B64 0
+ %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2
+ implicit %vcc = V_CMP_EQ_I32_e32 %vgpr1, %vgpr2, implicit %exec
+ %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3
+ %vcc = V_CMP_EQ_I32_e64 %vgpr1, %vgpr2, implicit %exec
+ %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ %vgpr4, %vcc = V_DIV_SCALE_F32 0, %vgpr1, 0, %vgpr1, 0, %vgpr3, 0, 0, implicit %exec
+ %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec
+ S_ENDPGM
+...
More information about the llvm-commits
mailing list