[llvm] [AMDGPU] Detect renamable kills when trying to form V_CMPX instructions. (PR #68293)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 5 02:28:21 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

<details>
<summary>Changes</summary>

During the SIOptimizeExecMasking pass, we try to form V_CMPX instructions by detecting S_AND_SAVEEXEC and V_MOV instructions. Generally, we require the input operand of the V_MOV, which is the input operand to the to-be-formed V_CMPX, to be alive. This is forced by clearing the kill flags on the operand after V_CMPX has been generated.

However, if we have a kill of a renamable register that contains said register, this will not be detected by clearKillFlags. Since clearing the kill flags will possibly cause the other VGPRs to stay alive as well, we skip forming a V_CMPX instruction in that case.

---
Full diff: https://github.com/llvm/llvm-project/pull/68293.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp (+16-2) 
- (added) llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-renamable-kill.mir (+25) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 04c9a6457944c5f..e3082f7d8aa1742 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -49,6 +49,7 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
   MachineInstr *findInstrBackwards(MachineInstr &Origin,
                                    std::function<bool(MachineInstr *)> Pred,
                                    ArrayRef<MCRegister> NonModifiableRegs,
+                                   MachineInstr *Terminator = nullptr,
                                    unsigned MaxInstructions = 20) const;
   bool optimizeExecSequence();
   void tryRecordVCmpxAndSaveexecSequence(MachineInstr &MI);
@@ -329,7 +330,8 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
 // registers given in NonModifiableRegs is modified by the current instruction.
 MachineInstr *SIOptimizeExecMasking::findInstrBackwards(
     MachineInstr &Origin, std::function<bool(MachineInstr *)> Pred,
-    ArrayRef<MCRegister> NonModifiableRegs, unsigned MaxInstructions) const {
+    ArrayRef<MCRegister> NonModifiableRegs, MachineInstr *Terminator,
+    unsigned MaxInstructions) const {
   MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator(),
                                       E = Origin.getParent()->rend();
   unsigned CurrentIteration = 0;
@@ -344,6 +346,17 @@ MachineInstr *SIOptimizeExecMasking::findInstrBackwards(
     for (MCRegister Reg : NonModifiableRegs) {
       if (A->modifiesRegister(Reg, TRI))
         return nullptr;
+
+      // Check for kills that appear after the terminator instruction, that
+      // would not be detected by clearKillFlags, since they will cause the
+      // register to be dead at a later place, causing the verifier to fail.
+      if (Terminator && A != Terminator && A->killsRegister(Reg, TRI)) {
+        for (MachineOperand &MO : A->operands()) {
+          if (MO.isReg() && MO.isKill() && MO.isRenamable() &&
+              TRI->regsOverlap(MO.getReg(), Reg))
+            return nullptr;
+        }
+      }
     }
 
     ++CurrentIteration;
@@ -690,7 +703,8 @@ void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence(
     NonDefRegs.push_back(Src1->getReg());
 
   if (!findInstrBackwards(
-          MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs))
+          MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs,
+          VCmp))
     return;
 
   if (VCmp)
diff --git a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-renamable-kill.mir b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-renamable-kill.mir
new file mode 100644
index 000000000000000..d4d0cb12f217544
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-renamable-kill.mir
@@ -0,0 +1,25 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX1100 %s
+
+---
+
+# GFX1100-LABEL: name: vcmp_saveexec_to_vcmpx_kill_between
+# GFX1100-NOT: V_CMPX
+name: vcmp_saveexec_to_vcmpx_kill_between
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr43, $sgpr44, $sgpr45, $sgpr55, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $vgpr40, $vgpr41, $vgpr76, $vgpr77, $vgpr78, $vgpr95, $vgpr109, $vgpr110, $vgpr111, $sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000000C, $sgpr52_sgpr53_sgpr54_sgpr55:0x0000000000000003, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $vgpr92_vgpr93_vgpr94_vgpr95:0x000000000000003F, $vgpr104_vgpr105_vgpr106_vgpr107:0x000000000000003F, $vgpr46_vgpr47:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000C, $vgpr72_vgpr73:0x000000000000000F, $vgpr74_vgpr75:0x000000000000000F, $vgpr88_vgpr89:0x000000000000000C, $vgpr90_vgpr91:0x0000000000000003, $vgpr124_vgpr125:0x000000000000000F, $vgpr126_vgpr127:0x000000000000000F
+
+  liveins: $vgpr0
+  renamable $vgpr1 = COPY $vgpr0, implicit $exec
+  renamable $sgpr0 = S_MOV_B32 0
+  renamable $sgpr1 = COPY renamable $sgpr0
+  renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  renamable $vcc_lo = V_CMP_EQ_U32_e64 0, $vgpr1, implicit $exec
+  renamable $sgpr2 = COPY renamable $sgpr0
+  renamable $sgpr3 = COPY renamable $sgpr0
+  BUFFER_STORE_DWORDX2_OFFSET_exact killed renamable $vgpr0_vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 344, 1, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8)
+  $sgpr0 = S_MOV_B32 $exec_lo
+  $sgpr0 = S_AND_SAVEEXEC_B32 $vcc_lo, implicit-def $exec, implicit-def $scc, implicit $exec
+  $exec_lo = S_MOV_B32_term killed renamable $sgpr0
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/68293


More information about the llvm-commits mailing list