[llvm] [AMDGPU] Fix phi injection in si-i1-lowering (PR #179267)

Wed Feb 4 03:24:19 PST 2026

================
@@ -442,30 +444,150 @@ bool Vreg1LoweringHelper::lowerCopiesFromI1() {
 PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
                                      MachineDominatorTree *DT,
                                      MachinePostDominatorTree *PDT)
-    : MF(MF), DT(DT), PDT(PDT) {
+    : MF(MF), DT(DT), PDT(PDT),
+      LMC(AMDGPU::LaneMaskConstants::get(MF->getSubtarget<GCNSubtarget>())) {
   MRI = &MF->getRegInfo();
 
   ST = &MF->getSubtarget<GCNSubtarget>();
   TII = ST->getInstrInfo();
-  IsWave32 = ST->isWave32();
-
-  if (IsWave32) {
-    ExecReg = AMDGPU::EXEC_LO;
-    MovOp = AMDGPU::S_MOV_B32;
-    AndOp = AMDGPU::S_AND_B32;
-    OrOp = AMDGPU::S_OR_B32;
-    XorOp = AMDGPU::S_XOR_B32;
-    AndN2Op = AMDGPU::S_ANDN2_B32;
-    OrN2Op = AMDGPU::S_ORN2_B32;
-  } else {
-    ExecReg = AMDGPU::EXEC;
-    MovOp = AMDGPU::S_MOV_B64;
-    AndOp = AMDGPU::S_AND_B64;
-    OrOp = AMDGPU::S_OR_B64;
-    XorOp = AMDGPU::S_XOR_B64;
-    AndN2Op = AMDGPU::S_ANDN2_B64;
-    OrN2Op = AMDGPU::S_ORN2_B64;
+}
+
+static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
+  Def = false;
+  Use = false;
+
+  for (const MachineOperand &MO : MI.operands()) {
+    if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
+      if (MO.isUse())
+        Use = true;
+      else
+        Def = true;
+    }
+  }
+}
+
+/// Move instruction to a new position inside the same MBB, if there is no
+/// operand's dependencies. Change the InstrToMovePos after the moved
+/// instruction. returns true if instruction moved, false if not.
+static bool
+moveIfPossible(MachineBasicBlock &MBB,
+               llvm::MachineBasicBlock::iterator &InstrToMovePos,
+               const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
+  MachineInstr &MI = *InstrToMovePos;
+
+  for (const MachineOperand &MO : MI.operands()) {
+    // Check if any operands are defined between current position and target
+    if (!MO.isReg())
+      continue;
+    if (MO.isUse()) {
+      for (auto I = std::next(MI.getIterator()); I != MoveAfterPos; ++I) {
+        for (const MachineOperand &MOI : I->operands()) {
+          if (MOI.isReg() && MOI.isDef() && MOI.getReg() == MO.getReg())
+            return false;
+        }
+      }
+    }
+
+    // Check if MI defines any register used before InsertPos
+    if (MO.isDef()) {
+      for (auto I = MoveAfterPos; I != MI.getIterator(); --I) {
+        for (const MachineOperand &MOI : I->operands()) {
+          if (MOI.isReg() && MOI.isUse() && MOI.getReg() == MO.getReg())
+            return false;
+        }
+      }
+    }
   }
+
+  MI.removeFromParent();
+  MBB.insertAfter(MoveAfterPos, &MI);
+  InstrToMovePos = MoveAfterPos;
+  InstrToMovePos++;
+  return true;
+}
+
+/// Insert mask calculation procedure.
+/// Finds a place for insertion, reorganize instruction if needed,
+/// store/restore SCC register if needed.
+void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
+  MachineBasicBlock &MBB = *Incoming.Block;
+  auto FirstTerminator = MBB.getFirstTerminator();
+
+  bool TerminatorsUseSCC = false;
+  for (auto I = FirstTerminator, E = MBB.end(); I != E; ++I) {
+    bool DefsSCC;
+    instrDefsUsesSCC(*I, DefsSCC, TerminatorsUseSCC);
+    if (TerminatorsUseSCC || DefsSCC)
+      break;
+  }
+
+  if (!TerminatorsUseSCC) {
+    buildMergeLaneMasks(MBB, FirstTerminator, {}, Incoming.UpdatedReg, DstReg,
+                        Incoming.Reg);
+    return;
+  }
+
+  std::optional<llvm::MachineBasicBlock::iterator> sccDefPos, curRegDefPos;
+  for (auto I = FirstTerminator; I != MBB.begin(); --I) {
+    const llvm::iterator_range<llvm::MachineOperand *> IMO = I->operands();
+
+    for (const auto &MO : IMO) {
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+
+      Register R = MO.getReg();
+
+      if (R == Incoming.Reg)
+        curRegDefPos = I;
+
+      if (R == AMDGPU::SCC) {
+        sccDefPos = I;
+        break;
+      }
+    }
+
+    if (sccDefPos)
+      break;
+  }
+
+  assert(sccDefPos);
+
+  if (!curRegDefPos) {
+    /// SCC define is after any of operator defines
+    buildMergeLaneMasks(MBB, sccDefPos.value(), {}, Incoming.UpdatedReg, DstReg,
+                        Incoming.Reg);
+    return;
+  }
+
+  assert(curRegDefPos && std::distance(MBB.begin(), curRegDefPos.value()) >
+                             std::distance(MBB.begin(), sccDefPos.value()));
+
+  /// Try to move the SCC def operator after the latest operator
+  if (moveIfPossible(MBB, sccDefPos.value(), curRegDefPos.value())) {
+    buildMergeLaneMasks(MBB, sccDefPos.value(), {}, Incoming.UpdatedReg, DstReg,
+                        Incoming.Reg);
+    return;
+  }
+
+  /// if not possible: store/restore SCC register
+  curRegDefPos.value()++;
+
+  /// store SCC
+  Register SavedSCC = MRI->createVirtualRegister(
+      ST->getWavefrontSize() == 32 ? &AMDGPU::SReg_32RegClass
+                          : &AMDGPU::SReg_64RegClass);
+  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CSelectOpc), SavedSCC)
+      .addImm(1)
+      .addImm(0);
+
+  buildMergeLaneMasks(MBB, curRegDefPos.value(), {}, Incoming.UpdatedReg,
+                      DstReg, Incoming.Reg);
+
+  /// restore SCC
+  BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CmpLGOp))
----------------
idubinov wrote:

Thank you for your comment. 
Following [next matrix](https://rocm.docs.amd.com/en/docs-7.1.1/compatibility/compatibility-matrix.html), the GFX6/7 is not supported as LLVM target in actual ROCm compiler. Correct me if I am wrong

https://github.com/llvm/llvm-project/pull/179267