[llvm] 4433f46 - [AMDGPU] Fix extra waitcnt being added with BUFFER_INVL2
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Tue May 11 13:19:00 PDT 2021
Author: Austin Kerbow
Date: 2021-05-11T13:17:33-07:00
New Revision: 4433f4601e8a8e36ddd9bb6f6ed394bda353b828
URL: https://github.com/llvm/llvm-project/commit/4433f4601e8a8e36ddd9bb6f6ed394bda353b828
DIFF: https://github.com/llvm/llvm-project/commit/4433f4601e8a8e36ddd9bb6f6ed394bda353b828.diff
LOG: [AMDGPU] Fix extra waitcnt being added with BUFFER_INVL2
The waitcnt pass would increment the number of vmem events for some buffer
invalidates that were not handled by the pass.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D102252
Added:
Modified:
llvm/lib/Target/AMDGPU/BUFInstructions.td
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 74705e47c3536..6ec17bc7f3fdb 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -332,6 +332,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
bits<4> elements = 0;
bits<1> has_sccb = 1;
bits<1> sccb_value = 0;
+ bits<1> IsBufferInv = 0;
}
class MUBUF_Real <MUBUF_Pseudo ps> :
@@ -374,7 +375,8 @@ class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> :
let mayLoad = 0;
let mayStore = 0;
- // Set everything to 0.
+ let IsBufferInv = 1;
+ // Set everything else to 0.
let offen = 0;
let idxen = 0;
let addr64 = 0;
@@ -2606,7 +2608,10 @@ let SubtargetPredicate = HasPackedD16VMem in {
def MUBUFInfoTable : GenericTable {
let FilterClass = "MUBUF_Pseudo";
let CppTypeName = "MUBUFInfo";
- let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"];
+ let Fields = [
+ "Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset",
+ "IsBufferInv"
+ ];
let PrimaryKey = ["Opcode"];
let PrimaryKeyName = "getMUBUFOpcodeHelper";
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index cead53a913d03..8017fb377525a 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -27,6 +27,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/MachinePostDominators.h"
@@ -945,8 +946,10 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
AMDGPU::Waitcnt Wait;
bool Modified = false;
- // See if this instruction has a forced S_WAITCNT VM.
- // TODO: Handle other cases of NeedsWaitcntVmBefore()
+ // FIXME: This should have already been handled by the memory legalizer.
+ // Removing this currently doesn't affect any lit tests, but we need to
+ // verify that nothing was relying on this. The number of buffer invalidates
+ // being handled here should not be expanded.
if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL ||
@@ -1317,12 +1320,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
if (FlatASCount > 1)
ScoreBrackets->setPendingFlat();
} else if (SIInstrInfo::isVMEM(Inst) &&
- // TODO: get a better carve out.
- Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1 &&
- Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_SC &&
- Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL &&
- Inst.getOpcode() != AMDGPU::BUFFER_GL0_INV &&
- Inst.getOpcode() != AMDGPU::BUFFER_GL1_INV) {
+ !llvm::AMDGPU::getMUBUFIsBufferInv(Inst.getOpcode())) {
if (!ST->hasVscnt())
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
else if ((Inst.mayLoad() && !SIInstrInfo::isAtomicNoRet(Inst)) ||
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 9251c5f4242b9..f9bf98d06fb06 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -162,6 +162,7 @@ struct MUBUFInfo {
bool has_vaddr;
bool has_srsrc;
bool has_soffset;
+ bool IsBufferInv;
};
struct MTBUFInfo {
@@ -257,6 +258,11 @@ bool getMUBUFHasSoffset(unsigned Opc) {
return Info ? Info->has_soffset : false;
}
+bool getMUBUFIsBufferInv(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->IsBufferInv : false;
+}
+
bool getSMEMIsBuffer(unsigned Opc) {
const SMInfo *Info = getSMEMOpcodeHelper(Opc);
return Info ? Info->IsBuffer : false;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index a60c0847cb879..79a50b4097b72 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -394,6 +394,9 @@ bool getMUBUFHasSrsrc(unsigned Opc);
LLVM_READONLY
bool getMUBUFHasSoffset(unsigned Opc);
+LLVM_READONLY
+bool getMUBUFIsBufferInv(unsigned Opc);
+
LLVM_READONLY
bool getSMEMIsBuffer(unsigned Opc);
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
index 0665f9647ac09..ada517ea78b34 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
@@ -192,6 +192,7 @@ body: |
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 3952
KILL $vgpr0
+...
# Combine preexisting waitcnt with wait added to the start of a non-entry function.
@@ -206,3 +207,28 @@ body: |
S_WAITCNT 0
S_ENDPGM 0
...
+
+# Verify that extra waitcnt are not added after buffer invalidate instructions.
+
+---
+name: test_waitcnt_preexisting_buffer_inv
+body: |
+ bb.0:
+ ; GFX9-LABEL: name: test_waitcnt_preexisting_buffer_inv
+ ; GFX9: S_WAITCNT 0
+ ; GFX9: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; GFX9: S_WAITCNT 3952
+ ; GFX9: BUFFER_INVL2 implicit $exec
+ ; GFX9: BUFFER_WBINVL1_VOL implicit $exec
+ ; GFX9: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX9: S_WAITCNT 112
+ ; GFX9: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX9: S_ENDPGM 0
+ $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
+ S_WAITCNT 3952
+ BUFFER_INVL2 implicit $exec
+ BUFFER_WBINVL1_VOL implicit $exec
+ $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list