[llvm] [AMDGPU] Fix phi injection in si-i1-lowering (PR #179267)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 3 08:28:21 PST 2026


https://github.com/idubinov updated https://github.com/llvm/llvm-project/pull/179267

>From 269afdf2aa1d52d415fe4bb7a87ebc99d3a60039 Mon Sep 17 00:00:00 2001
From: idubinov <igor.dubinov at amd.com>
Date: Mon, 2 Feb 2026 08:41:31 -0600
Subject: [PATCH 1/6] Fix phi injection in si-i1-lowering

---
 llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp    | 173 +++++++++++++++---
 llvm/lib/Target/AMDGPU/SILowerI1Copies.h      |   7 +-
 ...-copies-implicit-def-unstructured-loop.mir |  30 +--
 .../si-lower-i1-copies-phi-dependencies.mir   | 164 +++++++++++++++++
 4 files changed, 333 insertions(+), 41 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir

diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 5b329f46930ca..ef675eb958958 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -25,6 +25,8 @@
 #include "AMDGPU.h"
 #include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/InitializePasses.h"
+#include <iterator>
+#include <optional>
 
 #define DEBUG_TYPE "si-i1-copies"
 
@@ -389,8 +391,7 @@ insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
 
 #ifndef NDEBUG
 static bool isVRegCompatibleReg(const SIRegisterInfo &TRI,
-                                const MachineRegisterInfo &MRI,
-                                Register Reg) {
+                                const MachineRegisterInfo &MRI, Register Reg) {
   unsigned Size = TRI.getRegSizeInBits(Reg, MRI);
   return Size == 1 || Size == 32;
 }
@@ -447,9 +448,10 @@ PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
 
   ST = &MF->getSubtarget<GCNSubtarget>();
   TII = ST->getInstrInfo();
-  IsWave32 = ST->isWave32();
+  WavefrontSize = ST->getWavefrontSize();
+  assert((WavefrontSize == 32 || WavefrontSize == 64));
 
-  if (IsWave32) {
+  if (WavefrontSize == 32) {
     ExecReg = AMDGPU::EXEC_LO;
     MovOp = AMDGPU::S_MOV_B32;
     AndOp = AMDGPU::S_AND_B32;
@@ -457,6 +459,8 @@ PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
     XorOp = AMDGPU::S_XOR_B32;
     AndN2Op = AMDGPU::S_ANDN2_B32;
     OrN2Op = AMDGPU::S_ORN2_B32;
+    CSelectOp = AMDGPU::S_CSELECT_B32;
+    CmpLGOp = AMDGPU::S_CMP_LG_U32;
   } else {
     ExecReg = AMDGPU::EXEC;
     MovOp = AMDGPU::S_MOV_B64;
@@ -465,9 +469,148 @@ PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
     XorOp = AMDGPU::S_XOR_B64;
     AndN2Op = AMDGPU::S_ANDN2_B64;
     OrN2Op = AMDGPU::S_ORN2_B64;
+    CSelectOp = AMDGPU::S_CSELECT_B64;
+    CmpLGOp = AMDGPU::S_CMP_LG_U64;
   }
 }
 
+static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
+  Def = false;
+  Use = false;
+
+  for (const MachineOperand &MO : MI.operands()) {
+    if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
+      if (MO.isUse())
+        Use = true;
+      else
+        Def = true;
+    }
+  }
+}
+
+/// Move instruction to a new position inside the same MBB, if there is no
+/// operand's dependencies. Change the InstrToMovePos after the moved
+/// instruction. returns true if instruction moved, false if not.
+bool moveIfPossible(MachineBasicBlock &MBB,
+                    llvm::MachineBasicBlock::iterator &InstrToMovePos,
+                    const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
+  MachineInstr &MI = *InstrToMovePos;
+
+  for (const MachineOperand &MO : MI.operands()) {
+    // Check if any operands are defined between current position and target
+    if (!MO.isReg())
+      continue;
+    if (MO.isUse()) {
+      for (auto I = std::next(MI.getIterator()); I != MoveAfterPos; ++I) {
+        for (const MachineOperand &MOI : I->operands())
+          if (MOI.isReg() && MOI.isDef() && MOI.getReg() == MO.getReg())
+            return false;
+      }
+    }
+
+    // Check if MI defines any register used before InsertPos
+    if (MO.isDef()) {
+      for (auto I = MoveAfterPos; I != MI.getIterator(); --I) {
+        for (const MachineOperand &MOI : I->operands())
+          if (MOI.isReg() && MOI.isUse() && MOI.getReg() == MO.getReg())
+            return false;
+      }
+    }
+  }
+
+  MI.removeFromParent();
+  MBB.insertAfter(MoveAfterPos, &MI);
+  InstrToMovePos = MoveAfterPos;
+  InstrToMovePos++;
+  return true;
+}
+
+/// Insert mask calculation procedure.
+/// Finds a place for insertion, reorganize instruction if needed,
+/// store/restore SCC register if needed.
+void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
+  MachineBasicBlock &MBB = *Incoming.Block;
+  auto FirstTerminator = MBB.getFirstTerminator();
+
+  bool TerminatorsUseSCC = false;
+  for (auto I = FirstTerminator, E = MBB.end(); I != E; ++I) {
+    bool DefsSCC;
+    instrDefsUsesSCC(*I, DefsSCC, TerminatorsUseSCC);
+    if (TerminatorsUseSCC || DefsSCC)
+      break;
+  }
+
+  if (!TerminatorsUseSCC) {
+    buildMergeLaneMasks(MBB, FirstTerminator, {}, Incoming.UpdatedReg, DstReg,
+                        Incoming.Reg);
+    return;
+  }
+
+  std::optional<llvm::MachineBasicBlock::iterator> sccDefPos, curRegDefPos;
+  for (auto I = FirstTerminator; I != MBB.begin(); --I) {
+    const llvm::iterator_range<llvm::MachineOperand *> IMO = I->operands();
+
+    for (const auto &MO : IMO) {
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+
+      Register R = MO.getReg();
+
+      if (R == Incoming.Reg) {
+        curRegDefPos = I;
+      }
+
+      if (R == AMDGPU::SCC) {
+        sccDefPos = I;
+        break;
+      }
+    }
+
+    if (sccDefPos)
+      break;
+  }
+
+  assert(sccDefPos);
+
+  if (!curRegDefPos) {
+    /// SCC define is after any of operator defines
+    buildMergeLaneMasks(MBB, sccDefPos.value(), {}, Incoming.UpdatedReg, DstReg,
+                        Incoming.Reg);
+    return;
+  }
+
+  assert(curRegDefPos && std::distance(MBB.begin(), curRegDefPos.value()) >
+                             std::distance(MBB.begin(), sccDefPos.value()));
+
+  /// Try to move the SCC def operator after the latest operator
+  if (moveIfPossible(MBB, sccDefPos.value(), curRegDefPos.value())) {
+    buildMergeLaneMasks(MBB, sccDefPos.value(), {}, Incoming.UpdatedReg, DstReg,
+                        Incoming.Reg);
+    return;
+  }
+
+  /// if not possible: store/restore SCC register
+  curRegDefPos.value()++;
+
+  /// store SCC
+  Register SavedSCC = MRI->createVirtualRegister(
+      WavefrontSize == 32 ? &AMDGPU::SReg_32RegClass
+                          : &AMDGPU::SReg_64RegClass);
+  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CSelectOp), SavedSCC)
+      .addImm(1)
+      .addImm(0);
+
+  buildMergeLaneMasks(MBB, curRegDefPos.value(), {}, Incoming.UpdatedReg,
+                      DstReg, Incoming.Reg);
+
+  /// restore SCC
+  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CmpLGOp))
+      .addReg(SavedSCC)
+      .addImm(0)
+      .addReg(AMDGPU::SCC, RegState::ImplicitDefine);
+  return;
+}
+
 bool PhiLoweringHelper::lowerPhis() {
   MachineSSAUpdater SSAUpdater(*MF);
   LoopFinder LF(*DT, *PDT);
@@ -537,9 +680,7 @@ bool PhiLoweringHelper::lowerPhis() {
 
       for (auto &Incoming : Incomings) {
         MachineBasicBlock &IMBB = *Incoming.Block;
-        buildMergeLaneMasks(
-            IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg,
-            SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg);
+        insertMask(Incoming, SSAUpdater.GetValueInMiddleOfBlock(&IMBB));
       }
     } else {
       // The phi is not observed from outside a loop. Use a more accurate
@@ -566,9 +707,7 @@ bool PhiLoweringHelper::lowerPhis() {
           continue;
 
         MachineBasicBlock &IMBB = *Incoming.Block;
-        buildMergeLaneMasks(
-            IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg,
-            SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg);
+        insertMask(Incoming, SSAUpdater.GetValueInMiddleOfBlock(&IMBB));
       }
     }
 
@@ -696,20 +835,6 @@ bool PhiLoweringHelper::isConstantLaneMask(Register Reg, bool &Val) const {
   return false;
 }
 
-static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
-  Def = false;
-  Use = false;
-
-  for (const MachineOperand &MO : MI.operands()) {
-    if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
-      if (MO.isUse())
-        Use = true;
-      else
-        Def = true;
-    }
-  }
-}
-
 /// Return a point at the end of the given \p MBB to insert SALU instructions
 /// for lane mask calculation. Take terminators and SCC into account.
 MachineBasicBlock::iterator
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
index fd90328c2b926..bf96a07a611f4 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -20,7 +20,7 @@
 
 namespace llvm {
 
-/// Incoming for lane maks phi as machine instruction, incoming register \p Reg
+/// Incoming for lane mask phi as machine instruction, incoming register \p Reg
 /// and incoming block \p Block are taken from machine instruction.
 /// \p UpdatedReg (if valid) is \p Reg lane mask merged with another lane mask.
 struct Incoming {
@@ -42,7 +42,7 @@ class PhiLoweringHelper {
   virtual ~PhiLoweringHelper() = default;
 
 protected:
-  bool IsWave32 = false;
+  unsigned WavefrontSize;
   MachineFunction *MF = nullptr;
   MachineDominatorTree *DT = nullptr;
   MachinePostDominatorTree *PDT = nullptr;
@@ -62,12 +62,15 @@ class PhiLoweringHelper {
   unsigned XorOp;
   unsigned AndN2Op;
   unsigned OrN2Op;
+  unsigned CSelectOp;
+  unsigned CmpLGOp;
 
 public:
   bool lowerPhis();
   bool isConstantLaneMask(Register Reg, bool &Val) const;
   MachineBasicBlock::iterator
   getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+  void insertMask(const Incoming &Incoming, Register DstReg);
 
   void initializeLaneMaskRegisterAttributes(Register LaneMask) {
     LaneMaskRegAttrs = MRI->getVRegAttrs(LaneMask);
diff --git a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
index 028d511c6bf86..50219b38e636a 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
@@ -16,7 +16,7 @@ machineFunctionInfo:
 body:             |
   ; CHECK-LABEL: name: recursive_vreg_1_phi
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   successors: %bb.1(0x{{[0-9a-fA-F]+}})
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
@@ -47,23 +47,23 @@ body:             |
   ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
   ; CHECK-NEXT:   [[DEF6:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK-LABEL: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.3(0x{{[0-9a-fA-F]+}})
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
-  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
-  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
-  ; CHECK-NEXT:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; CHECK-NEXT:   [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
-  ; CHECK-NEXT:   S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
-  ; CHECK-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
-  ; CHECK-NEXT:   [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; CHECK-DAG:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
+  ; CHECK-DAG:   [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
+  ; CHECK-DAG:   [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
+  ; CHECK-DAG:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; CHECK-DAG:   [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+  ; CHECK-DAG:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
+  ; CHECK-DAG:   S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
+  ; CHECK-DAG:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+  ; CHECK-DAG:   [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.3, implicit $scc
   ; CHECK-NEXT:   S_BRANCH %bb.2
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-LABEL: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x{{[0-9a-fA-F]+}})
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
   ; CHECK-NEXT:   [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 6
@@ -83,7 +83,7 @@ body:             |
   ; CHECK-NEXT:   [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
-  ; CHECK-NEXT:   successors: %bb.4(0x00000000), %bb.1(0x80000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY6]], %bb.1, [[S_OR_B64_1]], %bb.2
   ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:sreg_64 = PHI [[PHI1]], %bb.1, [[DEF9]], %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
new file mode 100644
index 0000000000000..39fc7e34505d1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
@@ -0,0 +1,164 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-i1-copies -o - %s | FileCheck -check-prefixes=GCN %s
+
+---
+name:              phi_with_dependencies1
+# SCC def instruction (S_CMP_LG_U32) is below PHI dependancy ($17)
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: phi_with_dependencies1
+  ; GCN-LABEL: bb.0:
+  ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-NEXT: {{  $}}
+  ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GCN-NEXT: S_BRANCH %bb.1
+  ; GCN-LABEL: bb.1:
+  ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
+  ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GCN-NEXT: [[REG3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT: [[V_CMP:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[REG3]], 1, implicit $exec
+  ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[NEW_PHI]], $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP]], $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_OR]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
+  ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
+  ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NEXT: S_BRANCH %bb.2
+  ; GCN-LABEL: bb.2:
+  ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
+  ; GCN-NEXT: S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x80000000)
+    %0:sreg_32 = S_MOV_B32 1
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2, %bb.1
+    %13:sreg_32 = S_MOV_B32 2
+    %16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %17:sreg_64 = V_CMP_EQ_U32_e64 killed %16:vgpr_32, 1, implicit $exec
+    %1:vreg_1 = COPY %17:sreg_64
+    S_CMP_LG_U32 %0:sreg_32, killed %13:sreg_32, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+  ; predecessors: %bb.1
+    %2:vreg_1 = PHI %1:vreg_1, %bb.1
+    %19:sreg_64_xexec = COPY %2:vreg_1
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: phi_with_dependencies2
+---
+name:              phi_with_dependencies2
+# SCC def instruction (S_CMP_LG_U32) is above PHI dependancy ($17) definition and can be lowered
+tracksRegLiveness: true
+body:             |
+
+  ; GCN-LABEL: bb.0:
+  ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-NEXT: {{  $}}  
+  ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GCN-NEXT: S_BRANCH %bb.1
+
+  ; GCN-LABEL: bb.1:
+  ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
+  ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GCN-NEXT: [[V_MOV:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT: [[V_CMP_EQ:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV]], 1, implicit $exec
+  ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 %9, $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ]], $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_OR:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
+  ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
+  ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NEXT: S_BRANCH %bb.2
+
+  ; GCN-LABEL: bb.2:
+  ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
+  ; GCN-NEXT: S_ENDPGM 0
+
+  bb.0:
+    successors: %bb.1(0x80000000); %bb.1(100.00%)
+    %0:sreg_32 = S_MOV_B32 1
+    S_BRANCH %bb.1
+
+  bb.1:
+  ; predecessors: %bb.0, %bb.1
+    successors: %bb.2, %bb.1
+
+    %13:sreg_32 = S_MOV_B32 2
+    S_CMP_LG_U32 %0:sreg_32, killed %13:sreg_32, implicit-def $scc
+    %16:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %17:sreg_64 = V_CMP_EQ_U32_e64 killed %16:vgpr_32, 1, implicit $exec
+    %1:vreg_1 = COPY %17:sreg_64
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+  ; predecessors: %bb.1
+    %2:vreg_1 = PHI %1:vreg_1, %bb.1
+    %19:sreg_64_xexec = COPY %2:vreg_1
+    S_ENDPGM 0
+...
+
+
+# GCN-LABEL: name: phi_with_dependencies3
+---
+name:              phi_with_dependencies3
+# SCC def instruction (V_MOV_B32_e32) is above PHI dependancy ($17) definition and cannot be lowered
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: bb.0:
+  ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-NEXT: {{  $}}
+  ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GCN-NEXT: S_BRANCH %bb.1
+  ; GCN-LABEL: bb.1:
+  ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+    ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
+  ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
+  ; GCN-NEXT: [[REG3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT: [[V_CMP:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[REG3]], 1, implicit $exec
+  ; GCN-NEXT: [[HIDE_CSS:%[0-9]+]]:sreg_64 = S_CSELECT_B64 1, 0, implicit $scc
+  ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[NEW_PHI]], $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP]], $exec, implicit-def $scc
+  ; GCN-NEXT: [[S_OR]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
+  ; GCN-NEXT: S_CMP_LG_U64 [[HIDE_CSS]], 0, implicit-def $scc, implicit-def $scc
+  ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NEXT: S_BRANCH %bb.2
+  ; GCN-LABEL: bb.2:
+  ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
+  ; GCN-NEXT: S_ENDPGM 0
+
+  bb.0:
+    successors: %bb.1(0x80000000); %bb.1(100.00%)
+    %0:sreg_32 = S_MOV_B32 1
+    S_BRANCH %bb.1
+
+  bb.1:
+  ; predecessors: %bb.0, %bb.1
+    successors: %bb.2(0x40000000), %bb.1(0x40000000); %bb.2(50.00%), %bb.1(50.00%)
+
+    %1:sreg_32 = S_MOV_B32 2
+    S_CMP_LG_U32 %0:sreg_32, killed %1:sreg_32, implicit-def $scc
+    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec, implicit-def $scc
+    %4:sreg_64 = V_CMP_EQ_U32_e64 killed %2:vgpr_32, 1, implicit $exec
+    %5:vreg_1 = COPY %4:sreg_64
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+    S_BRANCH %bb.2
+
+  bb.2:
+  ; predecessors: %bb.1
+    %6:vreg_1 = PHI %5:vreg_1, %bb.1
+    %7:sreg_64_xexec = COPY %6:vreg_1
+    S_ENDPGM 0
+
+...

>From 4d4d326aa3b859955042cd315a13e26d2a87f2ce Mon Sep 17 00:00:00 2001
From: idubinov <igor.dubinov at amd.com>
Date: Mon, 2 Feb 2026 10:13:24 -0600
Subject: [PATCH 2/6] Autogenerated tests

---
 ...-copies-implicit-def-unstructured-loop.mir |  30 ++--
 .../si-lower-i1-copies-phi-dependencies.mir   | 140 +++++++++---------
 2 files changed, 86 insertions(+), 84 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
index 50219b38e636a..028d511c6bf86 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
+++ b/llvm/test/CodeGen/AMDGPU/lower-i1-copies-implicit-def-unstructured-loop.mir
@@ -16,7 +16,7 @@ machineFunctionInfo:
 body:             |
   ; CHECK-LABEL: name: recursive_vreg_1_phi
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x{{[0-9a-fA-F]+}})
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
@@ -47,23 +47,23 @@ body:             |
   ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
   ; CHECK-NEXT:   [[DEF6:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-LABEL: bb.1:
-  ; CHECK-NEXT:   successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.3(0x{{[0-9a-fA-F]+}})
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-DAG:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
-  ; CHECK-DAG:   [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
-  ; CHECK-DAG:   [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
-  ; CHECK-DAG:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; CHECK-DAG:   [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
-  ; CHECK-DAG:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
-  ; CHECK-DAG:   S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
-  ; CHECK-DAG:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
-  ; CHECK-DAG:   [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF6]], %bb.0, %31, %bb.3
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %54, %bb.3
+  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_3]], %bb.0, %29, %bb.3
+  ; CHECK-NEXT:   [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY [[S_ANDN2_B64_]]
+  ; CHECK-NEXT:   S_CMP_EQ_U32 [[PHI2]], killed [[S_MOV_B32_6]], implicit-def $scc
+  ; CHECK-NEXT:   [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+  ; CHECK-NEXT:   [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.3, implicit $scc
   ; CHECK-NEXT:   S_BRANCH %bb.2
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-LABEL: bb.2:
-  ; CHECK-NEXT:   successors: %bb.3(0x{{[0-9a-fA-F]+}})
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[V_MAD_I64_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
   ; CHECK-NEXT:   [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 6
@@ -83,7 +83,7 @@ body:             |
   ; CHECK-NEXT:   [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
-  ; CHECK-NEXT:   successors: %bb.4(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+  ; CHECK-NEXT:   successors: %bb.4(0x00000000), %bb.1(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY6]], %bb.1, [[S_OR_B64_1]], %bb.2
   ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:sreg_64 = PHI [[PHI1]], %bb.1, [[DEF9]], %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
index 39fc7e34505d1..9320735d46592 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-phi-dependencies.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-i1-copies -o - %s | FileCheck -check-prefixes=GCN %s
 
 ---
@@ -6,28 +7,30 @@ name:              phi_with_dependencies1
 tracksRegLiveness: true
 body:             |
   ; GCN-LABEL: name: phi_with_dependencies1
-  ; GCN-LABEL: bb.0:
-  ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
   ; GCN-NEXT: {{  $}}
-  ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
-  ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
-  ; GCN-NEXT: S_BRANCH %bb.1
-  ; GCN-LABEL: bb.1:
-  ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
-  ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
-  ; GCN-NEXT: [[REG3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GCN-NEXT: [[V_CMP:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[REG3]], 1, implicit $exec
-  ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[NEW_PHI]], $exec, implicit-def $scc
-  ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP]], $exec, implicit-def $scc
-  ; GCN-NEXT: [[S_OR]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
-  ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
-  ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
-  ; GCN-NEXT: S_BRANCH %bb.2
-  ; GCN-LABEL: bb.2:
-  ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
-  ; GCN-NEXT: S_ENDPGM 0
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.0, %5, %bb.1
+  ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV_B32_e32_]], 1, implicit $exec
+  ; GCN-NEXT:   [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+  ; GCN-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], $exec, implicit-def $scc
+  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_]], [[S_AND_B64_]], implicit-def $scc
+  ; GCN-NEXT:   S_CMP_LG_U32 [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B64_]]
+  ; GCN-NEXT:   S_ENDPGM 0
   bb.0:
     successors: %bb.1(0x80000000)
     %0:sreg_32 = S_MOV_B32 1
@@ -56,32 +59,31 @@ name:              phi_with_dependencies2
 # SCC def instruction (S_CMP_LG_U32) is above PHI dependancy ($17) definition and can be lowered
 tracksRegLiveness: true
 body:             |
-
-  ; GCN-LABEL: bb.0:
-  ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
-  ; GCN-NEXT: {{  $}}  
-  ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
-  ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
-  ; GCN-NEXT: S_BRANCH %bb.1
-
-  ; GCN-LABEL: bb.1:
-  ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-LABEL: name: phi_with_dependencies2
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
-  ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
-  ; GCN-NEXT: [[V_MOV:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GCN-NEXT: [[V_CMP_EQ:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV]], 1, implicit $exec
-  ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 %9, $exec, implicit-def $scc
-  ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ]], $exec, implicit-def $scc
-  ; GCN-NEXT: [[S_OR:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
-  ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
-  ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
-  ; GCN-NEXT: S_BRANCH %bb.2
-
-  ; GCN-LABEL: bb.2:
-  ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
-  ; GCN-NEXT: S_ENDPGM 0
-
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.0, %5, %bb.1
+  ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GCN-NEXT:   [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+  ; GCN-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 %3, $exec, implicit-def $scc
+  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_]], [[S_AND_B64_]], implicit-def $scc
+  ; GCN-NEXT:   S_CMP_LG_U32 [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV_B32_e32_]], 1, implicit $exec
+  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B64_]]
+  ; GCN-NEXT:   S_ENDPGM 0
   bb.0:
     successors: %bb.1(0x80000000); %bb.1(100.00%)
     %0:sreg_32 = S_MOV_B32 1
@@ -113,31 +115,31 @@ name:              phi_with_dependencies3
 # SCC def instruction (V_MOV_B32_e32) is above PHI dependancy ($17) definition and cannot be lowered
 tracksRegLiveness: true
 body:             |
-  ; GCN-LABEL: bb.0:
-  ; GCN-NEXT: successors: %bb.1(0x{{[0-9a-fA-F]+}})
+  ; GCN-LABEL: name: phi_with_dependencies3
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
   ; GCN-NEXT: {{  $}}
-  ; GCN-DAG: [[REG1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
-  ; GCN-DAG: [[IMPLICIT_DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
-  ; GCN-NEXT: S_BRANCH %bb.1
-  ; GCN-LABEL: bb.1:
-  ; GCN-NEXT: successors: %bb.2(0x{{[0-9a-fA-F]+}}), %bb.1(0x{{[0-9a-fA-F]+}})
-    ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: [[NEW_PHI:%[0-9]+]]:sreg_64 = PHI [[IMPLICIT_DEF]], %bb.0, [[S_OR:%[0-9]+]], %bb.1
-  ; GCN-NEXT: [[S_MOV:%[0-9]+]]:sreg_32 = S_MOV_B32 2
-  ; GCN-NEXT: S_CMP_LG_U32 [[REG1]], killed [[S_MOV]], implicit-def $scc
-  ; GCN-NEXT: [[REG3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-  ; GCN-NEXT: [[V_CMP:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[REG3]], 1, implicit $exec
-  ; GCN-NEXT: [[HIDE_CSS:%[0-9]+]]:sreg_64 = S_CSELECT_B64 1, 0, implicit $scc
-  ; GCN-NEXT: [[S_ANDN:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[NEW_PHI]], $exec, implicit-def $scc
-  ; GCN-NEXT: [[S_AND:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP]], $exec, implicit-def $scc
-  ; GCN-NEXT: [[S_OR]]:sreg_64 = S_OR_B64 [[S_ANDN]], [[S_AND]], implicit-def $scc
-  ; GCN-NEXT: S_CMP_LG_U64 [[HIDE_CSS]], 0, implicit-def $scc, implicit-def $scc
-  ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
-  ; GCN-NEXT: S_BRANCH %bb.2
-  ; GCN-LABEL: bb.2:
-  ; GCN-NEXT: {{%[0-9]+}}:sreg_64_xexec = COPY [[S_OR]]
-  ; GCN-NEXT: S_ENDPGM 0
-
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.0, %5, %bb.1
+  ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+  ; GCN-NEXT:   S_CMP_LG_U32 [[S_MOV_B32_]], killed [[S_MOV_B32_1]], implicit-def $scc
+  ; GCN-NEXT:   [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+  ; GCN-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 %3, $exec, implicit-def $scc
+  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_]], [[S_AND_B64_]], implicit-def $scc
+  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec, implicit-def $scc
+  ; GCN-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV_B32_e32_]], 1, implicit $exec
+  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B64_]]
+  ; GCN-NEXT:   S_ENDPGM 0
   bb.0:
     successors: %bb.1(0x80000000); %bb.1(100.00%)
     %0:sreg_32 = S_MOV_B32 1

>From 35bb2b01e34f749e531182bf0ea39014b03b236e Mon Sep 17 00:00:00 2001
From: idubinov <53053614+idubinov at users.noreply.github.com>
Date: Mon, 2 Feb 2026 17:33:05 +0100
Subject: [PATCH 3/6] Apply suggestions from code review

Co-authored-by: Shilei Tian <i at tianshilei.me>
---
 llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index ef675eb958958..a23a3c895d6ed 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -491,7 +491,7 @@ static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
 /// Move instruction to a new position inside the same MBB, if there is no
 /// operand's dependencies. Change the InstrToMovePos after the moved
 /// instruction. returns true if instruction moved, false if not.
-bool moveIfPossible(MachineBasicBlock &MBB,
+static bool moveIfPossible(MachineBasicBlock &MBB,
                     llvm::MachineBasicBlock::iterator &InstrToMovePos,
                     const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
   MachineInstr &MI = *InstrToMovePos;
@@ -502,18 +502,20 @@ bool moveIfPossible(MachineBasicBlock &MBB,
       continue;
     if (MO.isUse()) {
       for (auto I = std::next(MI.getIterator()); I != MoveAfterPos; ++I) {
-        for (const MachineOperand &MOI : I->operands())
+        for (const MachineOperand &MOI : I->operands()) {
           if (MOI.isReg() && MOI.isDef() && MOI.getReg() == MO.getReg())
             return false;
+        }
       }
     }
 
     // Check if MI defines any register used before InsertPos
     if (MO.isDef()) {
       for (auto I = MoveAfterPos; I != MI.getIterator(); --I) {
-        for (const MachineOperand &MOI : I->operands())
+        for (const MachineOperand &MOI : I->operands()) {
           if (MOI.isReg() && MOI.isUse() && MOI.getReg() == MO.getReg())
             return false;
+        }
       }
     }
   }
@@ -556,9 +558,8 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
 
       Register R = MO.getReg();
 
-      if (R == Incoming.Reg) {
+      if (R == Incoming.Reg)
         curRegDefPos = I;
-      }
 
       if (R == AMDGPU::SCC) {
         sccDefPos = I;

>From 81dd10e03f0a3830f29eabfbf1a34bdaec81d508 Mon Sep 17 00:00:00 2001
From: idubinov <53053614+idubinov at users.noreply.github.com>
Date: Mon, 2 Feb 2026 17:33:42 +0100
Subject: [PATCH 4/6] Update llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp

Co-authored-by: Shilei Tian <i at tianshilei.me>
---
 llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index a23a3c895d6ed..fdc73505a31d7 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -609,7 +609,6 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
       .addReg(SavedSCC)
       .addImm(0)
       .addReg(AMDGPU::SCC, RegState::ImplicitDefine);
-  return;
 }
 
 bool PhiLoweringHelper::lowerPhis() {

>From c4f29b3a956f1b44666c4ebad12f293106a8a02c Mon Sep 17 00:00:00 2001
From: idubinov <igor.dubinov at amd.com>
Date: Tue, 3 Feb 2026 08:33:37 -0600
Subject: [PATCH 5/6] Use LaneMaskConstants

---
 .../AMDGPUGlobalISelDivergenceLowering.cpp    |  8 +--
 llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h  |  4 ++
 llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp    | 63 +++++++------------
 llvm/lib/Target/AMDGPU/SILowerI1Copies.h      | 14 +----
 4 files changed, 32 insertions(+), 57 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
index f924335844da2..d670a55dbeefc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
@@ -173,9 +173,9 @@ void DivergenceLoweringHelper::buildMergeLaneMasks(
   Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
 
   B.setInsertPt(MBB, I);
-  B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
-  B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
-  B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
+  B.buildInstr(LMC.AndN2Opc, {PrevMaskedReg}, {PrevRegCopy, LMC.ExecReg});
+  B.buildInstr(LMC.AndOpc, {CurMaskedReg}, {LMC.ExecReg, CurRegCopy});
+  B.buildInstr(LMC.OrOpc, {DstReg}, {PrevMaskedReg, CurMaskedReg});
 }
 
 // GlobalISel has to constrain S1 incoming taken as-is with lane mask register
@@ -218,7 +218,7 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
 
     Register VgprReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
     B.buildInstr(AMDGPU::COPY, {VgprReg}, {Reg})
-        .addUse(ExecReg, RegState::Implicit);
+        .addUse(LMC.ExecReg, RegState::Implicit);
 
     replaceUsesOfRegInInstWith(Reg, UseInst, VgprReg);
     TDCache[Reg] = VgprReg;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
index df80196d95176..95d88c7af368c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
@@ -31,10 +31,12 @@ class LaneMaskConstants {
   const unsigned AndSaveExecTermOpc;
   const unsigned BfmOpc;
   const unsigned CMovOpc;
+  const unsigned CmpLGOp;
   const unsigned CSelectOpc;
   const unsigned MovOpc;
   const unsigned MovTermOpc;
   const unsigned OrOpc;
+  const unsigned OrN2Op;
   const unsigned OrTermOpc;
   const unsigned OrSaveExecOpc;
   const unsigned XorOpc;
@@ -57,10 +59,12 @@ class LaneMaskConstants {
                                     : AMDGPU::S_AND_SAVEEXEC_B64_term),
         BfmOpc(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64),
         CMovOpc(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
+        CmpLGOp(IsWave32 ? AMDGPU::S_CMP_LG_U32 : AMDGPU::S_CMP_LG_U64),
         CSelectOpc(IsWave32 ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64),
         MovOpc(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
         MovTermOpc(IsWave32 ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term),
         OrOpc(IsWave32 ? AMDGPU::S_OR_B32 : AMDGPU::S_OR_B64),
+        OrN2Op(IsWave32 ? AMDGPU::S_ORN2_B32 : AMDGPU::S_ORN2_B64),
         OrTermOpc(IsWave32 ? AMDGPU::S_OR_B32_term : AMDGPU::S_OR_B64_term),
         OrSaveExecOpc(IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32
                                : AMDGPU::S_OR_SAVEEXEC_B64),
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index fdc73505a31d7..de17df71fc936 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -23,6 +23,7 @@
 
 #include "SILowerI1Copies.h"
 #include "AMDGPU.h"
+#include "AMDGPULaneMaskUtils.h"
 #include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/InitializePasses.h"
 #include <iterator>
@@ -443,35 +444,12 @@ bool Vreg1LoweringHelper::lowerCopiesFromI1() {
 PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
                                      MachineDominatorTree *DT,
                                      MachinePostDominatorTree *PDT)
-    : MF(MF), DT(DT), PDT(PDT) {
+    : MF(MF), DT(DT), PDT(PDT),
+      LMC(AMDGPU::LaneMaskConstants::get(MF->getSubtarget<GCNSubtarget>())) {
   MRI = &MF->getRegInfo();
 
   ST = &MF->getSubtarget<GCNSubtarget>();
   TII = ST->getInstrInfo();
-  WavefrontSize = ST->getWavefrontSize();
-  assert((WavefrontSize == 32 || WavefrontSize == 64));
-
-  if (WavefrontSize == 32) {
-    ExecReg = AMDGPU::EXEC_LO;
-    MovOp = AMDGPU::S_MOV_B32;
-    AndOp = AMDGPU::S_AND_B32;
-    OrOp = AMDGPU::S_OR_B32;
-    XorOp = AMDGPU::S_XOR_B32;
-    AndN2Op = AMDGPU::S_ANDN2_B32;
-    OrN2Op = AMDGPU::S_ORN2_B32;
-    CSelectOp = AMDGPU::S_CSELECT_B32;
-    CmpLGOp = AMDGPU::S_CMP_LG_U32;
-  } else {
-    ExecReg = AMDGPU::EXEC;
-    MovOp = AMDGPU::S_MOV_B64;
-    AndOp = AMDGPU::S_AND_B64;
-    OrOp = AMDGPU::S_OR_B64;
-    XorOp = AMDGPU::S_XOR_B64;
-    AndN2Op = AMDGPU::S_ANDN2_B64;
-    OrN2Op = AMDGPU::S_ORN2_B64;
-    CSelectOp = AMDGPU::S_CSELECT_B64;
-    CmpLGOp = AMDGPU::S_CMP_LG_U64;
-  }
 }
 
 static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
@@ -491,9 +469,10 @@ static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
 /// Move instruction to a new position inside the same MBB, if there is no
 /// operand's dependencies. Change the InstrToMovePos after the moved
 /// instruction. returns true if instruction moved, false if not.
-static bool moveIfPossible(MachineBasicBlock &MBB,
-                    llvm::MachineBasicBlock::iterator &InstrToMovePos,
-                    const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
+static bool
+moveIfPossible(MachineBasicBlock &MBB,
+               llvm::MachineBasicBlock::iterator &InstrToMovePos,
+               const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
   MachineInstr &MI = *InstrToMovePos;
 
   for (const MachineOperand &MO : MI.operands()) {
@@ -597,7 +576,7 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
   Register SavedSCC = MRI->createVirtualRegister(
       WavefrontSize == 32 ? &AMDGPU::SReg_32RegClass
                           : &AMDGPU::SReg_64RegClass);
-  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CSelectOp), SavedSCC)
+  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CSelectOpc), SavedSCC)
       .addImm(1)
       .addImm(0);
 
@@ -605,7 +584,7 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
                       DstReg, Incoming.Reg);
 
   /// restore SCC
-  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CmpLGOp))
+  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CmpLGOp))
       .addReg(SavedSCC)
       .addImm(0)
       .addReg(AMDGPU::SCC, RegState::ImplicitDefine);
@@ -816,7 +795,7 @@ bool PhiLoweringHelper::isConstantLaneMask(Register Reg, bool &Val) const {
       return false;
   }
 
-  if (MI->getOpcode() != MovOp)
+  if (MI->getOpcode() != LMC.MovOpc)
     return false;
 
   if (!MI->getOperand(1).isImm())
@@ -920,10 +899,10 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
     if (PrevVal == CurVal) {
       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg);
     } else if (CurVal) {
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(ExecReg);
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(LMC.ExecReg);
     } else {
-      BuildMI(MBB, I, DL, TII->get(XorOp), DstReg)
-          .addReg(ExecReg)
+      BuildMI(MBB, I, DL, TII->get(LMC.XorOpc), DstReg)
+          .addReg(LMC.ExecReg)
           .addImm(-1);
     }
     return;
@@ -936,9 +915,9 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
       PrevMaskedReg = PrevReg;
     } else {
       PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
-      BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
+      BuildMI(MBB, I, DL, TII->get(LMC.AndN2Opc), PrevMaskedReg)
           .addReg(PrevReg)
-          .addReg(ExecReg);
+          .addReg(LMC.ExecReg);
     }
   }
   if (!CurConstant) {
@@ -947,9 +926,9 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
       CurMaskedReg = CurReg;
     } else {
       CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
-      BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
+      BuildMI(MBB, I, DL, TII->get(LMC.AndOpc), CurMaskedReg)
           .addReg(CurReg)
-          .addReg(ExecReg);
+          .addReg(LMC.ExecReg);
     }
   }
 
@@ -960,13 +939,13 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
         .addReg(PrevMaskedReg);
   } else if (PrevConstant && PrevVal) {
-    BuildMI(MBB, I, DL, TII->get(OrN2Op), DstReg)
+    BuildMI(MBB, I, DL, TII->get(LMC.OrN2Op), DstReg)
         .addReg(CurMaskedReg)
-        .addReg(ExecReg);
+        .addReg(LMC.ExecReg);
   } else {
-    BuildMI(MBB, I, DL, TII->get(OrOp), DstReg)
+    BuildMI(MBB, I, DL, TII->get(LMC.OrOpc), DstReg)
         .addReg(PrevMaskedReg)
-        .addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
+        .addReg(CurMaskedReg ? CurMaskedReg : LMC.ExecReg);
   }
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
index bf96a07a611f4..ee8d3c713143d 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AMDGPULaneMaskUtils.h"
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
@@ -42,7 +43,7 @@ class PhiLoweringHelper {
   virtual ~PhiLoweringHelper() = default;
 
 protected:
-  unsigned WavefrontSize;
+  unsigned WavefrontSize; /// grem
   MachineFunction *MF = nullptr;
   MachineDominatorTree *DT = nullptr;
   MachinePostDominatorTree *PDT = nullptr;
@@ -50,21 +51,12 @@ class PhiLoweringHelper {
   const GCNSubtarget *ST = nullptr;
   const SIInstrInfo *TII = nullptr;
   MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs;
+  const AMDGPU::LaneMaskConstants &LMC;
 
 #ifndef NDEBUG
   DenseSet<Register> PhiRegisters;
 #endif
 
-  Register ExecReg;
-  unsigned MovOp;
-  unsigned AndOp;
-  unsigned OrOp;
-  unsigned XorOp;
-  unsigned AndN2Op;
-  unsigned OrN2Op;
-  unsigned CSelectOp;
-  unsigned CmpLGOp;
-
 public:
   bool lowerPhis();
   bool isConstantLaneMask(Register Reg, bool &Val) const;

>From 1da5e7811c06eb58fe35db9e30a369d4229e4c9a Mon Sep 17 00:00:00 2001
From: idubinov <igor.dubinov at amd.com>
Date: Tue, 3 Feb 2026 10:15:27 -0600
Subject: [PATCH 6/6] Revert "Use LaneMaskConstants"

This reverts commit c4f29b3a956f1b44666c4ebad12f293106a8a02c.
---
 .../AMDGPUGlobalISelDivergenceLowering.cpp    |  8 +--
 llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h  |  4 --
 llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp    | 63 ++++++++++++-------
 llvm/lib/Target/AMDGPU/SILowerI1Copies.h      | 14 ++++-
 4 files changed, 57 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
index d670a55dbeefc..f924335844da2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
@@ -173,9 +173,9 @@ void DivergenceLoweringHelper::buildMergeLaneMasks(
   Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
 
   B.setInsertPt(MBB, I);
-  B.buildInstr(LMC.AndN2Opc, {PrevMaskedReg}, {PrevRegCopy, LMC.ExecReg});
-  B.buildInstr(LMC.AndOpc, {CurMaskedReg}, {LMC.ExecReg, CurRegCopy});
-  B.buildInstr(LMC.OrOpc, {DstReg}, {PrevMaskedReg, CurMaskedReg});
+  B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
+  B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
+  B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
 }
 
 // GlobalISel has to constrain S1 incoming taken as-is with lane mask register
@@ -218,7 +218,7 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
 
     Register VgprReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
     B.buildInstr(AMDGPU::COPY, {VgprReg}, {Reg})
-        .addUse(LMC.ExecReg, RegState::Implicit);
+        .addUse(ExecReg, RegState::Implicit);
 
     replaceUsesOfRegInInstWith(Reg, UseInst, VgprReg);
     TDCache[Reg] = VgprReg;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
index 95d88c7af368c..df80196d95176 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
@@ -31,12 +31,10 @@ class LaneMaskConstants {
   const unsigned AndSaveExecTermOpc;
   const unsigned BfmOpc;
   const unsigned CMovOpc;
-  const unsigned CmpLGOp;
   const unsigned CSelectOpc;
   const unsigned MovOpc;
   const unsigned MovTermOpc;
   const unsigned OrOpc;
-  const unsigned OrN2Op;
   const unsigned OrTermOpc;
   const unsigned OrSaveExecOpc;
   const unsigned XorOpc;
@@ -59,12 +57,10 @@ class LaneMaskConstants {
                                     : AMDGPU::S_AND_SAVEEXEC_B64_term),
         BfmOpc(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64),
         CMovOpc(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
-        CmpLGOp(IsWave32 ? AMDGPU::S_CMP_LG_U32 : AMDGPU::S_CMP_LG_U64),
         CSelectOpc(IsWave32 ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64),
         MovOpc(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
         MovTermOpc(IsWave32 ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term),
         OrOpc(IsWave32 ? AMDGPU::S_OR_B32 : AMDGPU::S_OR_B64),
-        OrN2Op(IsWave32 ? AMDGPU::S_ORN2_B32 : AMDGPU::S_ORN2_B64),
         OrTermOpc(IsWave32 ? AMDGPU::S_OR_B32_term : AMDGPU::S_OR_B64_term),
         OrSaveExecOpc(IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32
                                : AMDGPU::S_OR_SAVEEXEC_B64),
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index de17df71fc936..fdc73505a31d7 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -23,7 +23,6 @@
 
 #include "SILowerI1Copies.h"
 #include "AMDGPU.h"
-#include "AMDGPULaneMaskUtils.h"
 #include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/InitializePasses.h"
 #include <iterator>
@@ -444,12 +443,35 @@ bool Vreg1LoweringHelper::lowerCopiesFromI1() {
 PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
                                      MachineDominatorTree *DT,
                                      MachinePostDominatorTree *PDT)
-    : MF(MF), DT(DT), PDT(PDT),
-      LMC(AMDGPU::LaneMaskConstants::get(MF->getSubtarget<GCNSubtarget>())) {
+    : MF(MF), DT(DT), PDT(PDT) {
   MRI = &MF->getRegInfo();
 
   ST = &MF->getSubtarget<GCNSubtarget>();
   TII = ST->getInstrInfo();
+  WavefrontSize = ST->getWavefrontSize();
+  assert((WavefrontSize == 32 || WavefrontSize == 64));
+
+  if (WavefrontSize == 32) {
+    ExecReg = AMDGPU::EXEC_LO;
+    MovOp = AMDGPU::S_MOV_B32;
+    AndOp = AMDGPU::S_AND_B32;
+    OrOp = AMDGPU::S_OR_B32;
+    XorOp = AMDGPU::S_XOR_B32;
+    AndN2Op = AMDGPU::S_ANDN2_B32;
+    OrN2Op = AMDGPU::S_ORN2_B32;
+    CSelectOp = AMDGPU::S_CSELECT_B32;
+    CmpLGOp = AMDGPU::S_CMP_LG_U32;
+  } else {
+    ExecReg = AMDGPU::EXEC;
+    MovOp = AMDGPU::S_MOV_B64;
+    AndOp = AMDGPU::S_AND_B64;
+    OrOp = AMDGPU::S_OR_B64;
+    XorOp = AMDGPU::S_XOR_B64;
+    AndN2Op = AMDGPU::S_ANDN2_B64;
+    OrN2Op = AMDGPU::S_ORN2_B64;
+    CSelectOp = AMDGPU::S_CSELECT_B64;
+    CmpLGOp = AMDGPU::S_CMP_LG_U64;
+  }
 }
 
 static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
@@ -469,10 +491,9 @@ static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
 /// Move instruction to a new position inside the same MBB, if there is no
 /// operand's dependencies. Change the InstrToMovePos after the moved
 /// instruction. returns true if instruction moved, false if not.
-static bool
-moveIfPossible(MachineBasicBlock &MBB,
-               llvm::MachineBasicBlock::iterator &InstrToMovePos,
-               const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
+static bool moveIfPossible(MachineBasicBlock &MBB,
+                    llvm::MachineBasicBlock::iterator &InstrToMovePos,
+                    const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
   MachineInstr &MI = *InstrToMovePos;
 
   for (const MachineOperand &MO : MI.operands()) {
@@ -576,7 +597,7 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
   Register SavedSCC = MRI->createVirtualRegister(
       WavefrontSize == 32 ? &AMDGPU::SReg_32RegClass
                           : &AMDGPU::SReg_64RegClass);
-  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CSelectOpc), SavedSCC)
+  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CSelectOp), SavedSCC)
       .addImm(1)
       .addImm(0);
 
@@ -584,7 +605,7 @@ void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
                       DstReg, Incoming.Reg);
 
   /// restore SCC
-  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CmpLGOp))
+  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(CmpLGOp))
       .addReg(SavedSCC)
       .addImm(0)
       .addReg(AMDGPU::SCC, RegState::ImplicitDefine);
@@ -795,7 +816,7 @@ bool PhiLoweringHelper::isConstantLaneMask(Register Reg, bool &Val) const {
       return false;
   }
 
-  if (MI->getOpcode() != LMC.MovOpc)
+  if (MI->getOpcode() != MovOp)
     return false;
 
   if (!MI->getOperand(1).isImm())
@@ -899,10 +920,10 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
     if (PrevVal == CurVal) {
       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg);
     } else if (CurVal) {
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(LMC.ExecReg);
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(ExecReg);
     } else {
-      BuildMI(MBB, I, DL, TII->get(LMC.XorOpc), DstReg)
-          .addReg(LMC.ExecReg)
+      BuildMI(MBB, I, DL, TII->get(XorOp), DstReg)
+          .addReg(ExecReg)
           .addImm(-1);
     }
     return;
@@ -915,9 +936,9 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
       PrevMaskedReg = PrevReg;
     } else {
       PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
-      BuildMI(MBB, I, DL, TII->get(LMC.AndN2Opc), PrevMaskedReg)
+      BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
           .addReg(PrevReg)
-          .addReg(LMC.ExecReg);
+          .addReg(ExecReg);
     }
   }
   if (!CurConstant) {
@@ -926,9 +947,9 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
       CurMaskedReg = CurReg;
     } else {
       CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
-      BuildMI(MBB, I, DL, TII->get(LMC.AndOpc), CurMaskedReg)
+      BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
           .addReg(CurReg)
-          .addReg(LMC.ExecReg);
+          .addReg(ExecReg);
     }
   }
 
@@ -939,13 +960,13 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
         .addReg(PrevMaskedReg);
   } else if (PrevConstant && PrevVal) {
-    BuildMI(MBB, I, DL, TII->get(LMC.OrN2Op), DstReg)
+    BuildMI(MBB, I, DL, TII->get(OrN2Op), DstReg)
         .addReg(CurMaskedReg)
-        .addReg(LMC.ExecReg);
+        .addReg(ExecReg);
   } else {
-    BuildMI(MBB, I, DL, TII->get(LMC.OrOpc), DstReg)
+    BuildMI(MBB, I, DL, TII->get(OrOp), DstReg)
         .addReg(PrevMaskedReg)
-        .addReg(CurMaskedReg ? CurMaskedReg : LMC.ExecReg);
+        .addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
   }
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
index ee8d3c713143d..bf96a07a611f4 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPULaneMaskUtils.h"
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
@@ -43,7 +42,7 @@ class PhiLoweringHelper {
   virtual ~PhiLoweringHelper() = default;
 
 protected:
-  unsigned WavefrontSize; /// grem
+  unsigned WavefrontSize;
   MachineFunction *MF = nullptr;
   MachineDominatorTree *DT = nullptr;
   MachinePostDominatorTree *PDT = nullptr;
@@ -51,12 +50,21 @@ class PhiLoweringHelper {
   const GCNSubtarget *ST = nullptr;
   const SIInstrInfo *TII = nullptr;
   MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs;
-  const AMDGPU::LaneMaskConstants &LMC;
 
 #ifndef NDEBUG
   DenseSet<Register> PhiRegisters;
 #endif
 
+  Register ExecReg;
+  unsigned MovOp;
+  unsigned AndOp;
+  unsigned OrOp;
+  unsigned XorOp;
+  unsigned AndN2Op;
+  unsigned OrN2Op;
+  unsigned CSelectOp;
+  unsigned CmpLGOp;
+
 public:
   bool lowerPhis();
   bool isConstantLaneMask(Register Reg, bool &Val) const;



More information about the llvm-commits mailing list