[llvm] [AMDGPU] Detect renamable kills when trying to form V_CMPX instructions. (PR #68293)

Thomas Symalla via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 5 02:55:54 PDT 2023


https://github.com/tsymalla updated https://github.com/llvm/llvm-project/pull/68293

>From fabb65858e705981d4816a30627f4229b3f0d0d5 Mon Sep 17 00:00:00 2001
From: Thomas Symalla <thomas.symalla at amd.com>
Date: Thu, 5 Oct 2023 11:20:56 +0200
Subject: [PATCH] [AMDGPU] Detect renamable kills when trying to form V_CMPX
 instructions.

During the SIOptimizeExecMasking pass, we try to form V_CMPX instructions by
detecting S_AND_SAVEEXEC and V_MOV instructions. Generally, we require the input
operand of the V_MOV, which is the input operand to the to-be-formed V_CMPX, to
be alive. This is forced by clearing the kill flags on the operand after V_CMPX
has been generated.

However, if we have a kill of a register set that contains said register, this
will not be detected by clearKillFlags. Since clearing the kill flags will
possibly cause the other VGPRs to stay alive as well, we skip forming a V_CMPX
instruction in that case.
---
 .../Target/AMDGPU/SIOptimizeExecMasking.cpp   | 22 +++++++++++++---
 .../vcmp-saveexec-to-vcmpx-set-kill.mir       | 25 +++++++++++++++++++
 2 files changed, 44 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-set-kill.mir

diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 04c9a6457944c5f..65765d499b6902a 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -49,6 +49,7 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
   MachineInstr *findInstrBackwards(MachineInstr &Origin,
                                    std::function<bool(MachineInstr *)> Pred,
                                    ArrayRef<MCRegister> NonModifiableRegs,
+                                   MachineInstr *Terminator = nullptr,
                                    unsigned MaxInstructions = 20) const;
   bool optimizeExecSequence();
   void tryRecordVCmpxAndSaveexecSequence(MachineInstr &MI);
@@ -325,11 +326,12 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
 // Backwards-iterate from Origin (for n=MaxInstructions iterations) until either
 // the beginning of the BB is reached or Pred evaluates to true - which can be
 // an arbitrary condition based on the current MachineInstr, for instance an
-// target instruction. Breaks prematurely by returning nullptr if  one of the
+// target instruction. Breaks prematurely by returning nullptr if one of the
 // registers given in NonModifiableRegs is modified by the current instruction.
 MachineInstr *SIOptimizeExecMasking::findInstrBackwards(
     MachineInstr &Origin, std::function<bool(MachineInstr *)> Pred,
-    ArrayRef<MCRegister> NonModifiableRegs, unsigned MaxInstructions) const {
+    ArrayRef<MCRegister> NonModifiableRegs, MachineInstr *Terminator,
+    unsigned MaxInstructions) const {
   MachineBasicBlock::reverse_iterator A = Origin.getReverseIterator(),
                                       E = Origin.getParent()->rend();
   unsigned CurrentIteration = 0;
@@ -344,6 +346,19 @@ MachineInstr *SIOptimizeExecMasking::findInstrBackwards(
     for (MCRegister Reg : NonModifiableRegs) {
       if (A->modifiesRegister(Reg, TRI))
         return nullptr;
+
+      // Check for kills that appear after the terminator instruction, that
+      // would not be detected by clearKillFlags, since they will cause the
+      // register to be dead at a later place, causing the verifier to fail.
+      if (Terminator && A != Terminator && A->killsRegister(Reg, TRI)) {
+        for (MachineOperand &MO : A->operands()) {
+          if (MO.isReg() && MO.isKill()) {
+            Register Candidate = MO.getReg();
+            if (Candidate != Reg && TRI->regsOverlap(Candidate, Reg))
+              return nullptr;
+          }
+        }
+      }
     }
 
     ++CurrentIteration;
@@ -690,7 +705,8 @@ void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence(
     NonDefRegs.push_back(Src1->getReg());
 
   if (!findInstrBackwards(
-          MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs))
+          MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs,
+          VCmp))
     return;
 
   if (VCmp)
diff --git a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-set-kill.mir b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-set-kill.mir
new file mode 100644
index 000000000000000..272530efea076d6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx-set-kill.mir
@@ -0,0 +1,25 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX1100 %s
+
+---
+
+# GFX1100-LABEL: name: vcmp_saveexec_to_vcmpx_set_kill
+# GFX1100-NOT: V_CMPX
+name: vcmp_saveexec_to_vcmpx_set_kill
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr43, $sgpr44, $sgpr45, $sgpr55, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $vgpr40, $vgpr41, $vgpr76, $vgpr77, $vgpr78, $vgpr95, $vgpr109, $vgpr110, $vgpr111, $sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000000C, $sgpr52_sgpr53_sgpr54_sgpr55:0x0000000000000003, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $vgpr92_vgpr93_vgpr94_vgpr95:0x000000000000003F, $vgpr104_vgpr105_vgpr106_vgpr107:0x000000000000003F, $vgpr46_vgpr47:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000C, $vgpr72_vgpr73:0x000000000000000F, $vgpr74_vgpr75:0x000000000000000F, $vgpr88_vgpr89:0x000000000000000C, $vgpr90_vgpr91:0x0000000000000003, $vgpr124_vgpr125:0x000000000000000F, $vgpr126_vgpr127:0x000000000000000F
+
+  liveins: $vgpr0
+  renamable $vgpr1 = COPY $vgpr0, implicit $exec
+  renamable $sgpr0 = S_MOV_B32 0
+  renamable $sgpr1 = COPY renamable $sgpr0
+  renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  renamable $vcc_lo = V_CMP_EQ_U32_e64 0, $vgpr1, implicit $exec
+  renamable $sgpr2 = COPY renamable $sgpr0
+  renamable $sgpr3 = COPY renamable $sgpr0
+  BUFFER_STORE_DWORDX2_OFFSET_exact killed $vgpr0_vgpr1, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 344, 1, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8)
+  $sgpr0 = S_MOV_B32 $exec_lo
+  $sgpr0 = S_AND_SAVEEXEC_B32 $vcc_lo, implicit-def $exec, implicit-def $scc, implicit $exec
+  $exec_lo = S_MOV_B32_term killed renamable $sgpr0
+...



More information about the llvm-commits mailing list