[llvm] [AMDGPU] Switch V_CNDMASK operands to shrink it into VOP2 (PR #135162)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 11 10:03:59 PDT 2025
================
@@ -851,92 +851,137 @@ unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
return AMDGPU::V_CMP_LE_U32_e64;
case AMDGPU::V_CMP_LT_U32_e64:
return AMDGPU::V_CMP_GE_U32_e64;
- // float 32
+ // unsigned 64
+ case AMDGPU::V_CMP_EQ_U64_e64:
+ return AMDGPU::V_CMP_NE_U64_e64;
+ case AMDGPU::V_CMP_NE_U64_e64:
+ return AMDGPU::V_CMP_EQ_U64_e64;
+ case AMDGPU::V_CMP_GE_U64_e64:
+ return AMDGPU::V_CMP_LT_U64_e64;
+ case AMDGPU::V_CMP_LE_U64_e64:
+ return AMDGPU::V_CMP_GT_U64_e64;
+ case AMDGPU::V_CMP_GT_U64_e64:
+ return AMDGPU::V_CMP_LE_U64_e64;
+ case AMDGPU::V_CMP_LT_U64_e64:
+ return AMDGPU::V_CMP_GE_U64_e64;
+ // float 32
case AMDGPU::V_CMP_EQ_F32_e64:
return AMDGPU::V_CMP_NEQ_F32_e64;
case AMDGPU::V_CMP_NEQ_F32_e64:
return AMDGPU::V_CMP_EQ_F32_e64;
case AMDGPU::V_CMP_GE_F32_e64:
- return AMDGPU::V_CMP_LT_F32_e64;
+ return AMDGPU::V_CMP_NGE_F32_e64;
case AMDGPU::V_CMP_LE_F32_e64:
- return AMDGPU::V_CMP_GT_F32_e64;
+ return AMDGPU::V_CMP_NLE_F32_e64;
case AMDGPU::V_CMP_GT_F32_e64:
- return AMDGPU::V_CMP_LE_F32_e64;
+ return AMDGPU::V_CMP_NGT_F32_e64;
case AMDGPU::V_CMP_LT_F32_e64:
- return AMDGPU::V_CMP_GE_F32_e64;
+ return AMDGPU::V_CMP_NLT_F32_e64;
+ // float 64
+ case AMDGPU::V_CMP_EQ_F64_e64:
+ return AMDGPU::V_CMP_NEQ_F64_e64;
+ case AMDGPU::V_CMP_NEQ_F64_e64:
+ return AMDGPU::V_CMP_EQ_F64_e64;
+ case AMDGPU::V_CMP_GE_F64_e64:
+ return AMDGPU::V_CMP_NGE_F64_e64;
+ case AMDGPU::V_CMP_LE_F64_e64:
+ return AMDGPU::V_CMP_NLE_F64_e64;
+ case AMDGPU::V_CMP_GT_F64_e64:
+ return AMDGPU::V_CMP_NGT_F64_e64;
+ case AMDGPU::V_CMP_LT_F64_e64:
+ return AMDGPU::V_CMP_NLT_F64_e64;
default:
return 0;
}
}
-bool SIShrinkInstructions::shouldSwitchOperands(MachineRegisterInfo &MRI,
- MachineInstr &MI,
- const SIInstrInfo &TII) const {
- auto allUses = MRI.use_nodbg_operands(MI.getOperand(5).getReg());
- unsigned Count = 0;
+bool SIShrinkInstructions::shouldSwapCndOperands(
+ MachineInstr &MI, const SIInstrInfo &TII,
+ SmallVector<MachineOperand *, 4> &UsesToProcess) const {
+ auto AllUses = MRI->use_nodbg_operands(MI.getOperand(0).getReg());
+ bool ShouldSwap = false;
- for (auto &Use : allUses) {
- if (Use.getParent()->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
+ for (auto &Use : AllUses) {
+ MachineInstr *UseInst = Use.getParent();
+ if (UseInst->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
return false;
- MachineOperand *Src0 =
- TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src0);
- MachineOperand *Src1 =
- TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src1);
+ MachineOperand *Src0 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src0);
+ MachineOperand *Src1 = TII.getNamedOperand(*UseInst, AMDGPU::OpName::src1);
auto Src0Imm = Src0->isImm();
auto Src1Imm = Src1->isImm();
if (!Src1Imm && Src0Imm)
return false;
- if (Src1Imm && !Src0Imm)
- Count++;
+
+ UsesToProcess.push_back(&Use);
+
+ if (Src1Imm && !Src0Imm && !UseInst->getOperand(1).getImm())
+ ShouldSwap = true;
}
- return (Count >= 1);
+ return ShouldSwap;
}
-// OldVCC and NewVCC are used to remember VCC after inverting comparison
-bool SIShrinkInstructions::trySwitchOperands(MachineInstr &MI, Register *OldVCC,
- Register *NewVCC) const {
- const DebugLoc &DL = MI.getDebugLoc();
- auto Reg = MI.getOperand(5).getReg();
- if (!Reg.isVirtual())
- return false;
+void swapCndOperands(MachineInstr &MI) {
+ MachineOperand Op2 = MI.getOperand(2);
+ MachineOperand Op4 = MI.getOperand(4);
+
+ if (Op2.isReg()) {
+ MI.getOperand(4).ChangeToRegister(
+ Op2.getReg(), Op2.isDef(), Op2.isImplicit(), Op2.isKill(), Op2.isDead(),
+ Op2.isUndef(), Op2.isDebug());
+ if (Op2.getSubReg() != AMDGPU::NoSubRegister)
+ MI.getOperand(4).setSubReg(Op2.getSubReg());
+ } else if (Op2.isImm()) {
+ MI.getOperand(4).ChangeToImmediate(Op2.getImm());
+ }
- if (*OldVCC != Reg) {
- MachineInstr *DefMI = MRI->getVRegDef(Reg);
- if (DefMI) {
- unsigned Opcode = getInverseCompareOpcode(*DefMI);
- if (Opcode &&
- SIShrinkInstructions::shouldSwitchOperands(*MRI, MI, *TII)) {
- auto cmpDL = DefMI->getDebugLoc();
- *NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg));
- *OldVCC = Reg;
- MachineInstrBuilder InverseCompare = BuildMI(
- *DefMI->getParent(), DefMI, cmpDL, TII->get(Opcode), *NewVCC);
- InverseCompare->setFlags(DefMI->getFlags());
-
- unsigned OpNum = DefMI->getNumExplicitOperands();
- for (unsigned i = 1; i < OpNum; i++) {
- MachineOperand Op = DefMI->getOperand(i);
- InverseCompare.add(Op);
- if (Op.isReg() && Op.isKill())
- InverseCompare->getOperand(i).setIsKill(false);
- }
- }
- }
+ if (Op4.isReg()) {
+ MI.getOperand(2).setReg(Op4.getReg());
+ if (Op4.getSubReg() != AMDGPU::NoSubRegister)
+ MI.getOperand(2).setSubReg(Op4.getSubReg());
+ } else if (Op4.isImm()) {
+ MI.getOperand(2).ChangeToImmediate(Op4.getImm());
}
- if (*OldVCC == Reg) {
- BuildMI(*MI.getParent(), MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64),
- MI.getOperand(0).getReg())
- .add(MI.getOperand(3))
- .add(MI.getOperand(4))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .addReg(*NewVCC);
- MI.eraseFromParent();
- return true;
+
+ MachineOperand Op1 = MI.getOperand(1);
+ MachineOperand Op3 = MI.getOperand(3);
----------------
mbrkusanin wrote:
No need to copy full operands here, just the immediate value
https://github.com/llvm/llvm-project/pull/135162
More information about the llvm-commits
mailing list