[llvm] 6a7db0d - [AMDGPU] Skip some work on subtargets without scalar stores. NFC.
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 15 04:47:33 PST 2021
Author: Jay Foad
Date: 2021-12-15T12:46:33Z
New Revision: 6a7db0dc8eefcfbf1cadde41cb40f6e16c0c242a
URL: https://github.com/llvm/llvm-project/commit/6a7db0dc8eefcfbf1cadde41cb40f6e16c0c242a
DIFF: https://github.com/llvm/llvm-project/commit/6a7db0dc8eefcfbf1cadde41cb40f6e16c0c242a.diff
LOG: [AMDGPU] Skip some work on subtargets without scalar stores. NFC.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c9d9dd1fb82c5..70c5a52c6b281 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1686,45 +1686,47 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
}
} while (Repeat);
- SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
-
- bool HaveScalarStores = false;
-
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (!HaveScalarStores && TII->isScalarStore(MI))
- HaveScalarStores = true;
-
- if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
- MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
- EndPgmBlocks.push_back(&MBB);
+ if (ST->hasScalarStores()) {
+ SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
+ bool HaveScalarStores = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!HaveScalarStores && TII->isScalarStore(MI))
+ HaveScalarStores = true;
+
+ if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
+ MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
+ EndPgmBlocks.push_back(&MBB);
+ }
}
- }
- if (HaveScalarStores) {
- // If scalar writes are used, the cache must be flushed or else the next
- // wave to reuse the same scratch memory can be clobbered.
- //
- // Insert s_dcache_wb at wave termination points if there were any scalar
- // stores, and only if the cache hasn't already been flushed. This could be
- // improved by looking across blocks for flushes in postdominating blocks
- // from the stores but an explicitly requested flush is probably very rare.
- for (MachineBasicBlock *MBB : EndPgmBlocks) {
- bool SeenDCacheWB = false;
-
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I) {
- if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
- SeenDCacheWB = true;
- else if (TII->isScalarStore(*I))
- SeenDCacheWB = false;
-
- // FIXME: It would be better to insert this before a waitcnt if any.
- if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
- !SeenDCacheWB) {
- Modified = true;
- BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+ if (HaveScalarStores) {
+ // If scalar writes are used, the cache must be flushed or else the next
+ // wave to reuse the same scratch memory can be clobbered.
+ //
+ // Insert s_dcache_wb at wave termination points if there were any scalar
+ // stores, and only if the cache hasn't already been flushed. This could
+ // be improved by looking across blocks for flushes in postdominating
+ // blocks from the stores but an explicitly requested flush is probably
+ // very rare.
+ for (MachineBasicBlock *MBB : EndPgmBlocks) {
+ bool SeenDCacheWB = false;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
+ SeenDCacheWB = true;
+ else if (TII->isScalarStore(*I))
+ SeenDCacheWB = false;
+
+ // FIXME: It would be better to insert this before a waitcnt if any.
+ if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
+ !SeenDCacheWB) {
+ Modified = true;
+ BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+ }
}
}
}
More information about the llvm-commits
mailing list