[llvm] [AMDGPU] Fix phi injection in si-i1-lowering (PR #179267)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 2 07:53:03 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: None (idubinov)

<details>
<summary>Changes</summary>

PHI i1 lowering inserts scalar math procedure (in buildMergeLaneMasks function) to use masks to merge data from different sources. The procedure should be placed after all procedure's arguments definitions. And before set SCC (flag of scalar operations), otherwise the state of SCC will be changed which may affect branch instructions.

This patch changes merge masks procedure injection behavior. 

In case SCC define goes before the last procedure argument definition, code tries to reorganize instructions. In case if it is not possible - the SSC state spilled into new register before the merge masks procedure and restored after the procedure

---
Full diff: https://github.com/llvm/llvm-project/pull/179267.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp (+149-24) 
- (modified) llvm/lib/Target/AMDGPU/SILowerI1Copies.h (+5-2) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir (+15-15) 
- (added) llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir (+164) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 5b329f46930ca..ef675eb958958 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -25,6 +25,8 @@
 #include "AMDGPU.h"
 #include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/InitializePasses.h"
+#include <iterator>
+#include <optional>
 
 #define DEBUG_TYPE "si-i1-copies"
 
@@ -389,8 +391,7 @@ insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
 
 #ifndef NDEBUG
 static bool isVRegCompatibleReg(const SIRegisterInfo &TRI,
-                                const MachineRegisterInfo &MRI,
-                                Register Reg) {
+                                const MachineRegisterInfo &MRI, Register Reg) {
   unsigned Size = TRI.getRegSizeInBits(Reg, MRI);
   return Size == 1 || Size == 32;
 }
@@ -447,9 +448,10 @@ PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
 
   ST = &MF->getSubtarget<GCNSubtarget>();
   TII = ST->getInstrInfo();
-  IsWave32 = ST->isWave32();
+  WavefrontSize = ST->getWavefrontSize();
+  assert((WavefrontSize == 32 || WavefrontSize == 64));
 
-  if (IsWave32) {
+  if (WavefrontSize == 32) {
     ExecReg = AMDGPU::EXEC_LO;
     MovOp = AMDGPU::S_MOV_B32;
     AndOp = AMDGPU::S_AND_B32;
@@ -457,6 +459,8 @@ PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
     XorOp = AMDGPU::S_XOR_B32;
     AndN2Op = AMDGPU::S_ANDN2_B32;
     OrN2Op = AMDGPU::S_ORN2_B32;
+    CSelectOp = AMDGPU::S_CSELECT_B32;
+    CmpLGOp = AMDGPU::S_CMP_LG_U32;
   } else {
     ExecReg = AMDGPU::EXEC;
     MovOp = AMDGPU::S_MOV_B64;
@@ -465,9 +469,148 @@ PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
     XorOp = AMDGPU::S_XOR_B64;
     AndN2Op = AMDGPU::S_ANDN2_B64;
     OrN2Op = AMDGPU::S_ORN2_B64;
+    CSelectOp = AMDGPU::S_CSELECT_B64;
+    CmpLGOp = AMDGPU::S_CMP_LG_U64;
   }
 }
 
+static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
+  Def = false;
+  Use = false;
+
+  for (const MachineOperand &MO : MI.operands()) {
+    if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
+      if (MO.isUse())
+        Use = true;
+      else
+        Def = true;
+    }
+  }
+}
+
+/// Move instruction to a new position inside the same MBB, if there is no
+/// operand's dependencies. Change the InstrToMovePos after the moved
+/// instruction. returns true if instruction moved, false if not.
+bool moveIfPossible(MachineBasicBlock &MBB,
+                    llvm::MachineBasicBlock::iterator &InstrToMovePos,
+                    const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
+  MachineInstr &MI = *InstrToMovePos;
+
+  for (const MachineOperand &MO : MI.operands()) {
+    // Check if any operands are defined between current position and target
+    if (!MO.isReg())
+      continue;
+    if (MO.isUse()) {
+      for (auto I = std::next(MI.getIterator()); I != MoveAfterPos; ++I) {
+        for (const MachineOperand &MOI : I->operands())
+          if (MOI.isReg() && MOI.isDef() && MOI.getReg() == MO.getReg())
+            return false;
+      }
+    }
+
+    // Check if MI defines any register used before InsertPos
+    if (MO.isDef()) {
+      for (auto I = MoveAfterPos; I != MI.getIterator(); --I) {
+        for (const MachineOperand &MOI : I->operands())
+          if (MOI.isReg() && MOI.isUse() && MOI.getReg() == MO.getReg())
+            return false;
+      }
+    }
+  }
+
+  MI.removeFromParent();
+  MBB.insertAfter(MoveAfterPos, &MI);
+  InstrToMovePos = MoveAfterPos;
+  InstrToMovePos++;
+  return true;
+}
+
+/// Insert mask calculation procedure.
+/// Finds a place for insertion, reorganize instruction if needed,
+/// store/restore SCC register if needed.
+void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
+  MachineBasicBlock &MBB = *Incoming.Block;
+  auto FirstTerminator = MBB.getFirstTerminator();
+
+  bool TerminatorsUseSCC = false;
+  for (auto I = FirstTerminator, E = MBB.end(); I != E; ++I) {
+    bool DefsSCC;
+    instrDefsUsesSCC(*I, DefsSCC, TerminatorsUseSCC);
+    if (TerminatorsUseSCC || DefsSCC)
+      break;
+  }
+
+  if (!TerminatorsUseSCC) {
+    buildMergeLaneMasks(MBB, FirstTerminator, {}, Incoming.UpdatedReg, DstReg,
+                        Incoming.Reg);
+    return;
+  }
+
+  std::optional<llvm::MachineBasicBlock::iterator> sccDefPos, curRegDefPos;
+  for (auto I = FirstTerminator; I != MBB.begin(); --I) {
+    const llvm::iterator_range<llvm::MachineOperand *> IMO = I->operands();
+
+    for (const auto &MO : IMO) {
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+
+      Register R = MO.getReg();
+
+      if (R == Incoming.Reg) {
+        curRegDefPos = I;
+      }
+
+      if (R == AMDGPU::SCC) {
+        sccDefPos = I;
+        break;
+      }
+    }
+
+    if (sccDefPos)
+      break;
+  }
+
+  assert(sccDefPos);
+
+  if (!curRegDefPos) {
+    /// SCC define is after any of operator defines
+    buildMergeLaneMasks(MBB, sccDefPos.value(), {}, Incoming.UpdatedReg, DstReg,
+                        Incoming.Reg);
+    return;
+  }
+
+  assert(curRegDefPos && std::distance(MBB.begin(), curRegDefPos.value()) >
+                             std::distance(MBB.begin(), sccDefPos.value()));
+
+  /// Try to move the SCC def operator after the latest operator
+  if (moveIfPossible(MBB, sccDefPos.value(), curRegDefPos.value())) {
+    buildMergeLaneMasks(MBB, sccDefPos.value(), {}, Incoming.UpdatedReg, DstReg,
+                        Incoming.Reg);
+    return;
+  }
+
+  /// if not possible: store/restore SCC register
+  curRegDefPos.value()++;
+
+  /// store SCC
+  Register SavedSCC = MRI->createVirtualRegister(
+      WavefrontSize == 32 ? &AMDGPU::SReg_32RegClass
+                          : &AMDGPU::SReg_64RegClass);
+  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CSelectOp), SavedSCC)
+      .addImm(1)
+      .addImm(0);
+
+  buildMergeLaneMasks(MBB, curRegDefPos.value(), {}, Incoming.UpdatedReg,
+                      DstReg, Incoming.Reg);
+
+  /// restore SCC
+  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CmpLGOp))
+      .addReg(SavedSCC)
+      .addImm(0)
+      .addReg(AMDGPU::SCC, RegState::ImplicitDefine);
+  return;
+}
+
 bool PhiLoweringHelper::lowerPhis() {
   MachineSSAUpdater SSAUpdater(*MF);
   LoopFinder LF(*DT, *PDT);
@@ -537,9 +680,7 @@ bool PhiLoweringHelper::lowerPhis() {
 
       for (auto &Incoming : Incomings) {
         MachineBasicBlock &IMBB = *Incoming.Block;
-        buildMergeLaneMasks(
-            IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg,
-            SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg);
+        insertMask(Incoming, SSAUpdater.GetValueInMiddleOfBlock(&IMBB));
       }
     } else {
       // The phi is not observed from outside a loop. Use a more accurate
@@ -566,9 +707,7 @@ bool PhiLoweringHelper::lowerPhis() {
           continue;
 
         MachineBasicBlock &IMBB = *Incoming.Block;
-        buildMergeLaneMasks(
-            IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg,
-            SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg);
+        insertMask(Incoming, SSAUpdater.GetValueInMiddleOfBlock(&IMBB));
       }
     }
 
@@ -696,20 +835,6 @@ bool PhiLoweringHelper::isConstantLaneMask(Register Reg, bool &Val) const {
   return false;
 }
 
-static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
-  Def = false;
-  Use = false;
-
-  for (const MachineOperand &MO : MI.operands()) {
-    if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
-      if (MO.isUse())
-        Use = true;
-      else
-        Def = true;
-    }
-  }
-}
-
 /// Return a point at the end of the given \p MBB to insert SALU instructions
 /// for lane mask calculation. Take terminators and SCC into account.
 MachineBasicBlock::iterator
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
index fd90328c2b926..bf96a07a611f4 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -20,7 +20,7 @@
 
 namespace llvm {
 
-/// Incoming for lane maks phi as machine instruction, incoming register \p Reg
+/// Incoming for lane mask phi as machine instruction, incoming register \p Reg
 /// and incoming block \p Block are taken from machine instruction.
 /// \p UpdatedReg (if valid) is \p Reg lane mask merged with another lane mask.
 struct Incoming {
@@ -42,7 +42,7 @@ class PhiLoweringHelper {
   virtual ~PhiLoweringHelper() = default;
 
 protected:
-  bool IsWave32 = false;
+  unsigned WavefrontSize;
   MachineFunction *MF = nullptr;
   MachineDominatorTree *DT = nullptr;
   MachinePostDominatorTree *PDT = nullptr;
@@ -62,12 +62,15 @@ class PhiLoweringHelper {
   unsigned XorOp;
   unsigned AndN2Op;
   unsigned OrN2Op;
+  unsigned CSelectOp;
+  unsigned CmpLGOp;
 
 public:
   bool lowerPhis();
   bool isConstantLaneMask(Register Reg, bool &Val) const;
   MachineBasicBlock::iterator
   getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+  void insertMask(const Incoming &Incoming, Register DstReg);
 
   void initializeLaneMaskRegisterAttributes(Register LaneMask) {
     LaneMaskRegAttrs = MRI->getVRegAttrs(LaneMask);
diff --git a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
index 028d511c6bf86..50219b38e636a 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
@@ -16,7 +16,7 @@ machineFunctionInfo:
 body:             |
   ; CHECK-LABEL: name: recursive_vreg_1_phi
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   successors: %bb.1(0x{{[0-9a-fA-F]+}})
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
@@ -47,23 +47,23 @@ body:             |
   ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
   ; CHECK-NEXT:   [[DEF6:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK-LABEL: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.3(0x{{[0-9a-fA-F]+}})
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
-  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
-  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
-  ; CHECK-NEXT:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; CHECK-NEXT:   [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
-  ; CHECK-NEXT:   S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
-  ; CHECK-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
-  ; CHECK-NEXT:   [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; CHECK-DAG:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
+  ; CHECK-DAG:   [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
+  ; CHECK-DAG:   [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
+  ; CHECK-DAG:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; CHECK-DAG:   [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+  ; CHECK-DAG:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
+  ; CHECK-DAG:   S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
+  ; CHECK-DAG:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+  ; CHECK-DAG:   [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.3, implicit $scc
   ; CHECK-NEXT:   S_BRANCH %bb.2
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-LABEL: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x{{[0-9a-fA-F]+}})
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
   ; CHECK-NEXT:   [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 6
@@ -83,7 +83,7 @@ body:             |
   ; CHECK-NEXT:   [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
-  ; CHECK-NEXT:   successors: %bb.4(0x00000000), %bb.1(0x80000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY6]], %bb.1, [[S_OR_B64_1]], %bb.2
   ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:sreg_64 = PHI [[PHI1]], %bb.1, [[DEF9]], %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
new file mode 100644
index 0000000000000..39fc7e34505d1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
@@ -0,0 +1,164 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-i1-copies -o - %s | FileCheck -check-prefixes=GCN %s
+
+---
+name:              phi_with_dependencies1
+# SCC def instruction (S_CMP_LG_U32) is below PHI dependancy ($17)
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: phi_with_dependencies1
+  ; GCN-LABEL: bb.0:
+  ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-NEXT: {{  $}}
+  ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GCN-NEXT: S_BRANCH %bb.1
+  ; GCN-LABEL: bb.1:
+  ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
+  ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GCN-NEXT: [[REG3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT: [[V_CMP:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[REG3]], 1, implicit $exec
+  ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[NEW_PHI]], $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP]], $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_OR]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
+  ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
+  ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NEXT: S_BRANCH %bb.2
+  ; GCN-LABEL: bb.2:
+  ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
+  ; GCN-NEXT: S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x80000000)
+    %0:sreg_32 = S_MOV_B32 1
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2, %bb.1
+    %13:sreg_32 = S_MOV_B32 2
+    %16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %17:sreg_64 = V_CMP_EQ_U32_e64 killed %16:vgpr_32, 1, implicit $exec
+    %1:vreg_1 = COPY %17:sreg_64
+    S_CMP_LG_U32 %0:sreg_32, killed %13:sreg_32, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+  ; predecessors: %bb.1
+    %2:vreg_1 = PHI %1:vreg_1, %bb.1
+    %19:sreg_64_xexec = COPY %2:vreg_1
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: phi_with_dependencies2
+---
+name:              phi_with_dependencies2
+# SCC def instruction (S_CMP_LG_U32) is above PHI dependancy ($17) definition and can be lowered
+tracksRegLiveness: true
+body:             |
+
+  ; GCN-LABEL: bb.0:
+  ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-NEXT: {{  $}}  
+  ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GCN-NEXT: S_BRANCH %bb.1
+
+  ; GCN-LABEL: bb.1:
+  ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
+  ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GCN-NEXT: [[V_MOV:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT: [[V_CMP_EQ:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV]], 1, implicit $exec
+  ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 %9, $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ]], $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_OR:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
+  ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
+  ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NEXT: S_BRANCH %bb.2
+
+  ; GCN-LABEL: bb.2:
+  ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
+  ; GCN-NEXT: S_ENDPGM 0
+
+  bb.0:
+    successors: %bb.1(0x80000000); %bb.1(100.00%)
+    %0:sreg_32 = S_MOV_B32 1
+    S_BRANCH %bb.1
+
+  bb.1:
+  ; predecessors: %bb.0, %bb.1
+    successors: %bb.2, %bb.1
+
+    %13:sreg_32 = S_MOV_B32 2
+    S_CMP_LG_U32 %0:sreg_32, killed %13:sreg_32, implicit-def $scc
+    %16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %17:sreg_64 = V_CMP_EQ_U32_e64 killed %16:vgpr_32, 1, implicit $exec
+    %1:vreg_1 = COPY %17:sreg_64
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+  ; predecessors: %bb.1
+    %2:vreg_1 = PHI %1:vreg_1, %bb.1
+    %19:sreg_64_xexec = COPY %2:vreg_1
+    S_ENDPGM 0
+...
+
+
+# GCN-LABEL: name: phi_with_dependencies3
+---
+name:              phi_with_dependencies3
+# SCC def instruction (V_MOV_B32_e32) is above PHI dependancy ($17) definition and cannot be lowered
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: bb.0:
+  ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-NEXT: {{  $}}
+  ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GCN-NEXT: S_BRANCH %bb.1
+  ; GCN-LABEL: bb.1:
+  ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+    ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
+  ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
+  ; GCN-NEXT: [[REG3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT: [[V_CMP:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[REG3]], 1, implicit $exec
+  ; GCN-NEXT: [[HIDE_CSS:%[0-9]+]]:sreg_64 = S_CSELECT_B64 1, 0, implicit $scc
+  ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[NEW_PHI]], $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP]], $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_OR]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
+  ; GCN-NEXT: S_CMP_LG_U64 [[HIDE_CSS]], 0, implicit-def $scc, implicit-def $scc
+  ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NEXT: S_BRANCH %bb.2
+  ; GCN-LABEL: bb.2:
+  ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
+  ; GCN-NEXT: S_ENDPGM 0
+
+  bb.0:
+    successors: %bb.1(0x80000000); %bb.1(100.00%)
+    %0:sreg_32 = S_MOV_B32 1
+    S_BRANCH %bb.1
+
+  bb.1:
+  ; predecessors: %bb.0, %bb.1
+    successors: %bb.2(0x40000000), %bb.1(0x40000000); %bb.2(50.00%), %bb.1(50.00%)
+
+    %1:sreg_32 = S_MOV_B32 2
+    S_CMP_LG_U32 %0:sreg_32, killed %1:sreg_32, implicit-def $scc
+    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec, implicit-def $scc
+    %4:sreg_64 = V_CMP_EQ_U32_e64 killed %2:vgpr_32, 1, implicit $exec
+    %5:vreg_1 = COPY %4:sreg_64
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+  ; predecessors: %bb.1
+    %6:vreg_1 = PHI %5:vreg_1, %bb.1
+    %7:sreg_64_xexec = COPY %6:vreg_1
+    S_ENDPGM 0
+
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/179267


More information about the llvm-commits mailing list