[llvm] [AMDGPU] Reduce use of continue in SIWholeQuadMode. NFC. (PR #93659)

Wed May 29 02:48:55 PDT 2024

https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/93659

None

>From d9d4ba67399ff0e7bb2604a51758d0ab3c5ff770 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 29 May 2024 10:42:40 +0100
Subject: [PATCH] [AMDGPU] Reduce use of continue in SIWholeQuadMode. NFC.

---
 llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 92 +++++++++-------------
 1 file changed, 36 insertions(+), 56 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index ea8109bbee9ae..09dc1c781e2f3 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -278,11 +278,10 @@ LLVM_DUMP_METHOD void SIWholeQuadMode::printInfo() {
 
     for (const MachineInstr &MI : *BII.first) {
       auto III = Instructions.find(&MI);
-      if (III == Instructions.end())
-        continue;
-
-      dbgs() << "  " << MI << "    Needs = " << PrintState(III->second.Needs)
-             << ", OutNeeds = " << PrintState(III->second.OutNeeds) << '\n';
+      if (III != Instructions.end()) {
+        dbgs() << "  " << MI << "    Needs = " << PrintState(III->second.Needs)
+               << ", OutNeeds = " << PrintState(III->second.OutNeeds) << '\n';
+      }
     }
   }
 }
@@ -455,10 +454,8 @@ void SIWholeQuadMode::markOperand(const MachineInstr &MI,
     for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
       LiveRange &LR = LIS->getRegUnit(Unit);
       const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
-      if (!Value)
-        continue;
-
-      markDefs(MI, LR, Unit, AMDGPU::NoSubRegister, Flag, Worklist);
+      if (Value)
+        markDefs(MI, LR, Unit, AMDGPU::NoSubRegister, Flag, Worklist);
     }
   }
 }
@@ -499,19 +496,16 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
 
       if (TII->isWQM(Opcode)) {
         // If LOD is not supported WQM is not needed.
-        if (!ST->hasExtendedImageInsts())
-          continue;
         // Only generate implicit WQM if implicit derivatives are required.
         // This avoids inserting unintended WQM if a shader type without
         // implicit derivatives uses an image sampling instruction.
-        if (!HasImplicitDerivatives)
-          continue;
-        // Sampling instructions don't need to produce results for all pixels
-        // in a quad, they just require all inputs of a quad to have been
-        // computed for derivatives.
-        markInstructionUses(MI, StateWQM, Worklist);
-        GlobalFlags |= StateWQM;
-        continue;
+        if (ST->hasExtendedImageInsts() && HasImplicitDerivatives) {
+          // Sampling instructions don't need to produce results for all pixels
+          // in a quad, they just require all inputs of a quad to have been
+          // computed for derivatives.
+          markInstructionUses(MI, StateWQM, Worklist);
+          GlobalFlags |= StateWQM;
+        }
       } else if (Opcode == AMDGPU::WQM) {
         // The WQM intrinsic requires its output to have all the helper lanes
         // correct, so we need it to be in WQM.
@@ -520,7 +514,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
       } else if (Opcode == AMDGPU::SOFT_WQM) {
         LowerToCopyInstrs.push_back(&MI);
         SoftWQMInstrs.push_back(&MI);
-        continue;
       } else if (Opcode == AMDGPU::STRICT_WWM) {
         // The STRICT_WWM intrinsic doesn't make the same guarantee, and plus
         // it needs to be executed in WQM or Exact so that its copy doesn't
@@ -528,7 +521,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
         markInstructionUses(MI, StateStrictWWM, Worklist);
         GlobalFlags |= StateStrictWWM;
         LowerToMovInstrs.push_back(&MI);
-        continue;
       } else if (Opcode == AMDGPU::STRICT_WQM ||
                  TII->isDualSourceBlendEXP(MI)) {
         // STRICT_WQM is similar to STRICTWWM, but instead of enabling all
@@ -551,7 +543,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
           GlobalFlags |= StateExact;
           III.Disabled = StateWQM | StateStrict;
         }
-        continue;
       } else if (Opcode == AMDGPU::LDS_PARAM_LOAD ||
                  Opcode == AMDGPU::DS_PARAM_LOAD ||
                  Opcode == AMDGPU::LDS_DIRECT_LOAD ||
@@ -561,7 +552,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
         InstrInfo &II = Instructions[&MI];
         II.Needs |= StateStrictWQM;
         GlobalFlags |= StateStrictWQM;
-        continue;
       } else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
                  Opcode == AMDGPU::V_SET_INACTIVE_B64) {
         III.Disabled = StateStrict;
@@ -574,7 +564,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
           }
         }
         SetInactiveInstrs.push_back(&MI);
-        continue;
       } else if (TII->isDisableWQM(MI)) {
         BBI.Needs |= StateExact;
         if (!(BBI.InNeeds & StateExact)) {
@@ -583,40 +572,33 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
         }
         GlobalFlags |= StateExact;
         III.Disabled = StateWQM | StateStrict;
-        continue;
-      } else {
-        if (Opcode == AMDGPU::SI_PS_LIVE || Opcode == AMDGPU::SI_LIVE_MASK) {
-          LiveMaskQueries.push_back(&MI);
-        } else if (Opcode == AMDGPU::SI_KILL_I1_TERMINATOR ||
-                   Opcode == AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR ||
-                   Opcode == AMDGPU::SI_DEMOTE_I1) {
-          KillInstrs.push_back(&MI);
-          BBI.NeedsLowering = true;
-        } else if (WQMOutputs) {
-          // The function is in machine SSA form, which means that physical
-          // VGPRs correspond to shader inputs and outputs. Inputs are
-          // only used, outputs are only defined.
-          // FIXME: is this still valid?
-          for (const MachineOperand &MO : MI.defs()) {
-            if (!MO.isReg())
-              continue;
-
-            Register Reg = MO.getReg();
-
-            if (!Reg.isVirtual() &&
-                TRI->hasVectorRegisters(TRI->getPhysRegBaseClass(Reg))) {
-              Flags = StateWQM;
-              break;
-            }
+      } else if (Opcode == AMDGPU::SI_PS_LIVE ||
+                 Opcode == AMDGPU::SI_LIVE_MASK) {
+        LiveMaskQueries.push_back(&MI);
+      } else if (Opcode == AMDGPU::SI_KILL_I1_TERMINATOR ||
+                 Opcode == AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR ||
+                 Opcode == AMDGPU::SI_DEMOTE_I1) {
+        KillInstrs.push_back(&MI);
+        BBI.NeedsLowering = true;
+      } else if (WQMOutputs) {
+        // The function is in machine SSA form, which means that physical
+        // VGPRs correspond to shader inputs and outputs. Inputs are
+        // only used, outputs are only defined.
+        // FIXME: is this still valid?
+        for (const MachineOperand &MO : MI.defs()) {
+          Register Reg = MO.getReg();
+          if (Reg.isPhysical() &&
+              TRI->hasVectorRegisters(TRI->getPhysRegBaseClass(Reg))) {
+            Flags = StateWQM;
+            break;
           }
         }
-
-        if (!Flags)
-          continue;
       }
 
-      markInstruction(MI, Flags, Worklist);
-      GlobalFlags |= Flags;
+      if (Flags) {
+        markInstruction(MI, Flags, Worklist);
+        GlobalFlags |= Flags;
+      }
     }
   }
 
@@ -1568,8 +1550,6 @@ void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
     case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
       SplitPoint = lowerKillF32(*MBB, *MI);
       break;
-    default:
-      continue;
     }
     if (SplitPoint)
       splitBlock(MBB, SplitPoint);