[llvm] [AMDGPU] Fix phi injection in si-i1-lowering (PR #179267)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 4 03:24:19 PST 2026
================
@@ -442,30 +444,150 @@ bool Vreg1LoweringHelper::lowerCopiesFromI1() {
PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF,
MachineDominatorTree *DT,
MachinePostDominatorTree *PDT)
- : MF(MF), DT(DT), PDT(PDT) {
+ : MF(MF), DT(DT), PDT(PDT),
+ LMC(AMDGPU::LaneMaskConstants::get(MF->getSubtarget<GCNSubtarget>())) {
MRI = &MF->getRegInfo();
ST = &MF->getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
- IsWave32 = ST->isWave32();
-
- if (IsWave32) {
- ExecReg = AMDGPU::EXEC_LO;
- MovOp = AMDGPU::S_MOV_B32;
- AndOp = AMDGPU::S_AND_B32;
- OrOp = AMDGPU::S_OR_B32;
- XorOp = AMDGPU::S_XOR_B32;
- AndN2Op = AMDGPU::S_ANDN2_B32;
- OrN2Op = AMDGPU::S_ORN2_B32;
- } else {
- ExecReg = AMDGPU::EXEC;
- MovOp = AMDGPU::S_MOV_B64;
- AndOp = AMDGPU::S_AND_B64;
- OrOp = AMDGPU::S_OR_B64;
- XorOp = AMDGPU::S_XOR_B64;
- AndN2Op = AMDGPU::S_ANDN2_B64;
- OrN2Op = AMDGPU::S_ORN2_B64;
+}
+
+static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use) {
+ Def = false;
+ Use = false;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
+ if (MO.isUse())
+ Use = true;
+ else
+ Def = true;
+ }
+ }
+}
+
+/// Move instruction to a new position inside the same MBB, if there is no
+/// operand's dependencies. Change the InstrToMovePos after the moved
+/// instruction. returns true if instruction moved, false if not.
+static bool
+moveIfPossible(MachineBasicBlock &MBB,
+ llvm::MachineBasicBlock::iterator &InstrToMovePos,
+ const llvm::MachineBasicBlock::iterator &MoveAfterPos) {
+ MachineInstr &MI = *InstrToMovePos;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ // Check if any operands are defined between current position and target
+ if (!MO.isReg())
+ continue;
+ if (MO.isUse()) {
+ for (auto I = std::next(MI.getIterator()); I != MoveAfterPos; ++I) {
+ for (const MachineOperand &MOI : I->operands()) {
+ if (MOI.isReg() && MOI.isDef() && MOI.getReg() == MO.getReg())
+ return false;
+ }
+ }
+ }
+
+ // Check if MI defines any register used before InsertPos
+ if (MO.isDef()) {
+ for (auto I = MoveAfterPos; I != MI.getIterator(); --I) {
+ for (const MachineOperand &MOI : I->operands()) {
+ if (MOI.isReg() && MOI.isUse() && MOI.getReg() == MO.getReg())
+ return false;
+ }
+ }
+ }
}
+
+ MI.removeFromParent();
+ MBB.insertAfter(MoveAfterPos, &MI);
+ InstrToMovePos = MoveAfterPos;
+ InstrToMovePos++;
+ return true;
+}
+
+/// Insert mask calculation procedure.
+/// Finds a place for insertion, reorganize instruction if needed,
+/// store/restore SCC register if needed.
+void PhiLoweringHelper::insertMask(const Incoming &Incoming, Register DstReg) {
+ MachineBasicBlock &MBB = *Incoming.Block;
+ auto FirstTerminator = MBB.getFirstTerminator();
+
+ bool TerminatorsUseSCC = false;
+ for (auto I = FirstTerminator, E = MBB.end(); I != E; ++I) {
+ bool DefsSCC;
+ instrDefsUsesSCC(*I, DefsSCC, TerminatorsUseSCC);
+ if (TerminatorsUseSCC || DefsSCC)
+ break;
+ }
+
+ if (!TerminatorsUseSCC) {
+ buildMergeLaneMasks(MBB, FirstTerminator, {}, Incoming.UpdatedReg, DstReg,
+ Incoming.Reg);
+ return;
+ }
+
+ std::optional<llvm::MachineBasicBlock::iterator> sccDefPos, curRegDefPos;
+ for (auto I = FirstTerminator; I != MBB.begin(); --I) {
+ const llvm::iterator_range<llvm::MachineOperand *> IMO = I->operands();
+
+ for (const auto &MO : IMO) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ Register R = MO.getReg();
+
+ if (R == Incoming.Reg)
+ curRegDefPos = I;
+
+ if (R == AMDGPU::SCC) {
+ sccDefPos = I;
+ break;
+ }
+ }
+
+ if (sccDefPos)
+ break;
+ }
+
+ assert(sccDefPos);
+
+ if (!curRegDefPos) {
+ /// SCC define is after any of operator defines
+ buildMergeLaneMasks(MBB, sccDefPos.value(), {}, Incoming.UpdatedReg, DstReg,
+ Incoming.Reg);
+ return;
+ }
+
+ assert(curRegDefPos && std::distance(MBB.begin(), curRegDefPos.value()) >
+ std::distance(MBB.begin(), sccDefPos.value()));
+
+ /// Try to move the SCC def operator after the latest operator
+ if (moveIfPossible(MBB, sccDefPos.value(), curRegDefPos.value())) {
+ buildMergeLaneMasks(MBB, sccDefPos.value(), {}, Incoming.UpdatedReg, DstReg,
+ Incoming.Reg);
+ return;
+ }
+
+ /// if not possible: store/restore SCC register
+ curRegDefPos.value()++;
+
+ /// store SCC
+ Register SavedSCC = MRI->createVirtualRegister(
+ ST->getWavefrontSize() == 32 ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::SReg_64RegClass);
+ BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CSelectOpc), SavedSCC)
+ .addImm(1)
+ .addImm(0);
+
+ buildMergeLaneMasks(MBB, curRegDefPos.value(), {}, Incoming.UpdatedReg,
+ DstReg, Incoming.Reg);
+
+ /// restore SCC
+ BuildMI(MBB, curRegDefPos.value(), {}, TII->get(LMC.CmpLGOp))
----------------
idubinov wrote:
Thank you for your comment.
Following [next matrix](https://rocm.docs.amd.com/en/docs-7.1.1/compatibility/compatibility-matrix.html), the GFX6/7 is not supported as LLVM target in actual ROCm compiler. Correct me if I am wrong
https://github.com/llvm/llvm-project/pull/179267
More information about the llvm-commits
mailing list