[llvm] 6a7db0d - [AMDGPU] Skip some work on subtargets without scalar stores. NFC.

Wed Dec 15 04:47:33 PST 2021

Author: Jay Foad
Date: 2021-12-15T12:46:33Z
New Revision: 6a7db0dc8eefcfbf1cadde41cb40f6e16c0c242a

URL: https://github.com/llvm/llvm-project/commit/6a7db0dc8eefcfbf1cadde41cb40f6e16c0c242a
DIFF: https://github.com/llvm/llvm-project/commit/6a7db0dc8eefcfbf1cadde41cb40f6e16c0c242a.diff

LOG: [AMDGPU] Skip some work on subtargets without scalar stores. NFC.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c9d9dd1fb82c5..70c5a52c6b281 100644

--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1686,45 +1686,47 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
     }
   } while (Repeat);
 
-  SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
-
-  bool HaveScalarStores = false;
-
-  for (MachineBasicBlock &MBB : MF) {
-    for (MachineInstr &MI : MBB) {
-      if (!HaveScalarStores && TII->isScalarStore(MI))
-        HaveScalarStores = true;
-
-      if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
-          MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
-        EndPgmBlocks.push_back(&MBB);
+  if (ST->hasScalarStores()) {
+    SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
+    bool HaveScalarStores = false;
+
+    for (MachineBasicBlock &MBB : MF) {
+      for (MachineInstr &MI : MBB) {
+        if (!HaveScalarStores && TII->isScalarStore(MI))
+          HaveScalarStores = true;
+
+        if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
+            MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
+          EndPgmBlocks.push_back(&MBB);
+      }
     }
-  }
 
-  if (HaveScalarStores) {
-    // If scalar writes are used, the cache must be flushed or else the next
-    // wave to reuse the same scratch memory can be clobbered.
-    //
-    // Insert s_dcache_wb at wave termination points if there were any scalar
-    // stores, and only if the cache hasn't already been flushed. This could be
-    // improved by looking across blocks for flushes in postdominating blocks
-    // from the stores but an explicitly requested flush is probably very rare.
-    for (MachineBasicBlock *MBB : EndPgmBlocks) {
-      bool SeenDCacheWB = false;
-
-      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
-           ++I) {
-        if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
-          SeenDCacheWB = true;
-        else if (TII->isScalarStore(*I))
-          SeenDCacheWB = false;
-
-        // FIXME: It would be better to insert this before a waitcnt if any.
-        if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
-             I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
-            !SeenDCacheWB) {
-          Modified = true;
-          BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+    if (HaveScalarStores) {
+      // If scalar writes are used, the cache must be flushed or else the next
+      // wave to reuse the same scratch memory can be clobbered.
+      //
+      // Insert s_dcache_wb at wave termination points if there were any scalar
+      // stores, and only if the cache hasn't already been flushed. This could
+      // be improved by looking across blocks for flushes in postdominating
+      // blocks from the stores but an explicitly requested flush is probably
+      // very rare.
+      for (MachineBasicBlock *MBB : EndPgmBlocks) {
+        bool SeenDCacheWB = false;
+
+        for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+             I != E; ++I) {
+          if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
+            SeenDCacheWB = true;
+          else if (TII->isScalarStore(*I))
+            SeenDCacheWB = false;
+
+          // FIXME: It would be better to insert this before a waitcnt if any.
+          if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
+               I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
+              !SeenDCacheWB) {
+            Modified = true;
+            BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+          }
         }
       }
     }