[llvm] [AMDGPU] Swap V_CNDMASK operands to shrink it into VOP2 (PR #135162)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 28 06:49:58 PDT 2025
================
@@ -831,6 +836,213 @@ bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
return true;
}
+unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ // int 32
+ case AMDGPU::V_CMP_EQ_I32_e64:
+ return AMDGPU::V_CMP_NE_I32_e64;
+ case AMDGPU::V_CMP_NE_I32_e64:
+ return AMDGPU::V_CMP_EQ_I32_e64;
+ case AMDGPU::V_CMP_GE_I32_e64:
+ return AMDGPU::V_CMP_LT_I32_e64;
+ case AMDGPU::V_CMP_LE_I32_e64:
+ return AMDGPU::V_CMP_GT_I32_e64;
+ case AMDGPU::V_CMP_GT_I32_e64:
+ return AMDGPU::V_CMP_LE_I32_e64;
+ case AMDGPU::V_CMP_LT_I32_e64:
+ return AMDGPU::V_CMP_GE_I32_e64;
+ // int 64
+ case AMDGPU::V_CMP_EQ_I64_e64:
+ return AMDGPU::V_CMP_NE_I64_e64;
+ case AMDGPU::V_CMP_NE_I64_e64:
+ return AMDGPU::V_CMP_EQ_I64_e64;
+ case AMDGPU::V_CMP_GE_I64_e64:
+ return AMDGPU::V_CMP_LT_I64_e64;
+ case AMDGPU::V_CMP_LE_I64_e64:
+ return AMDGPU::V_CMP_GT_I64_e64;
+ case AMDGPU::V_CMP_GT_I64_e64:
+ return AMDGPU::V_CMP_LE_I64_e64;
+ case AMDGPU::V_CMP_LT_I64_e64:
+ return AMDGPU::V_CMP_GE_I64_e64;
+ // unsigned 32
+ case AMDGPU::V_CMP_EQ_U32_e64:
+ return AMDGPU::V_CMP_NE_U32_e64;
+ case AMDGPU::V_CMP_NE_U32_e64:
+ return AMDGPU::V_CMP_EQ_U32_e64;
+ case AMDGPU::V_CMP_GE_U32_e64:
+ return AMDGPU::V_CMP_LT_U32_e64;
+ case AMDGPU::V_CMP_LE_U32_e64:
+ return AMDGPU::V_CMP_GT_U32_e64;
+ case AMDGPU::V_CMP_GT_U32_e64:
+ return AMDGPU::V_CMP_LE_U32_e64;
+ case AMDGPU::V_CMP_LT_U32_e64:
+ return AMDGPU::V_CMP_GE_U32_e64;
+ // unsigned 64
+ case AMDGPU::V_CMP_EQ_U64_e64:
+ return AMDGPU::V_CMP_NE_U64_e64;
+ case AMDGPU::V_CMP_NE_U64_e64:
+ return AMDGPU::V_CMP_EQ_U64_e64;
+ case AMDGPU::V_CMP_GE_U64_e64:
+ return AMDGPU::V_CMP_LT_U64_e64;
+ case AMDGPU::V_CMP_LE_U64_e64:
+ return AMDGPU::V_CMP_GT_U64_e64;
+ case AMDGPU::V_CMP_GT_U64_e64:
+ return AMDGPU::V_CMP_LE_U64_e64;
+ case AMDGPU::V_CMP_LT_U64_e64:
+ return AMDGPU::V_CMP_GE_U64_e64;
+ // float 32
+ case AMDGPU::V_CMP_EQ_F32_e64:
+ return AMDGPU::V_CMP_NEQ_F32_e64;
+ case AMDGPU::V_CMP_NEQ_F32_e64:
+ return AMDGPU::V_CMP_EQ_F32_e64;
+ case AMDGPU::V_CMP_GE_F32_e64:
+ return AMDGPU::V_CMP_NGE_F32_e64;
+ case AMDGPU::V_CMP_NGE_F32_e64:
+ return AMDGPU::V_CMP_GE_F32_e64;
+ case AMDGPU::V_CMP_LE_F32_e64:
+ return AMDGPU::V_CMP_NLE_F32_e64;
+ case AMDGPU::V_CMP_NLE_F32_e64:
+ return AMDGPU::V_CMP_LE_F32_e64;
+ case AMDGPU::V_CMP_GT_F32_e64:
+ return AMDGPU::V_CMP_NGT_F32_e64;
+ case AMDGPU::V_CMP_NGT_F32_e64:
+ return AMDGPU::V_CMP_GT_F32_e64;
+ case AMDGPU::V_CMP_LT_F32_e64:
+ return AMDGPU::V_CMP_NLT_F32_e64;
+ case AMDGPU::V_CMP_NLT_F32_e64:
+ return AMDGPU::V_CMP_LT_F32_e64;
+ case AMDGPU::V_CMP_LG_F32_e64:
+ return AMDGPU::V_CMP_NLG_F32_e64;
+ case AMDGPU::V_CMP_NLG_F32_e64:
+ return AMDGPU::V_CMP_LG_F32_e64;
+ case AMDGPU::V_CMP_O_F32_e64:
+ return AMDGPU::V_CMP_U_F32_e64;
+ case AMDGPU::V_CMP_U_F32_e64:
+ return AMDGPU::V_CMP_O_F32_e64;
+ // float 64
+ case AMDGPU::V_CMP_EQ_F64_e64:
+ return AMDGPU::V_CMP_NEQ_F64_e64;
+ case AMDGPU::V_CMP_NEQ_F64_e64:
+ return AMDGPU::V_CMP_EQ_F64_e64;
+ case AMDGPU::V_CMP_GE_F64_e64:
+ return AMDGPU::V_CMP_NGE_F64_e64;
+ case AMDGPU::V_CMP_NGE_F64_e64:
+ return AMDGPU::V_CMP_GE_F64_e64;
+ case AMDGPU::V_CMP_LE_F64_e64:
+ return AMDGPU::V_CMP_NLE_F64_e64;
+ case AMDGPU::V_CMP_NLE_F64_e64:
+ return AMDGPU::V_CMP_LE_F64_e64;
+ case AMDGPU::V_CMP_GT_F64_e64:
+ return AMDGPU::V_CMP_NGT_F64_e64;
+ case AMDGPU::V_CMP_NGT_F64_e64:
+ return AMDGPU::V_CMP_GT_F32_e64;
+ case AMDGPU::V_CMP_LT_F64_e64:
+ return AMDGPU::V_CMP_NLT_F64_e64;
+ case AMDGPU::V_CMP_NLT_F64_e64:
+ return AMDGPU::V_CMP_LT_F64_e64;
+ case AMDGPU::V_CMP_LG_F64_e64:
+ return AMDGPU::V_CMP_NLG_F64_e64;
+ case AMDGPU::V_CMP_NLG_F64_e64:
+ return AMDGPU::V_CMP_LG_F64_e64;
+ case AMDGPU::V_CMP_O_F64_e64:
+ return AMDGPU::V_CMP_U_F64_e64;
+ case AMDGPU::V_CMP_U_F64_e64:
+ return AMDGPU::V_CMP_O_F64_e64;
+ default:
+ return 0;
+ }
+}
+
+bool SIShrinkInstructions::shouldSwapCndOperands(
+ MachineInstr &MI, SmallVector<MachineOperand *, 4> &UsesToProcess) const {
+ auto AllUses = MRI->use_nodbg_operands(MI.getOperand(0).getReg());
+ int InstsToSwap = 0;
+
+ for (auto &Use : AllUses) {
+ MachineInstr *UseInst = Use.getParent();
+ if (UseInst->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
+ return false;
+
+ UsesToProcess.push_back(&Use);
+
+ MachineOperand &Src0 = UseInst->getOperand(2);
+ MachineOperand &Src1 = UseInst->getOperand(4);
+
+ bool Src0Imm = Src0.isImm();
+ bool Src1Imm = Src1.isImm();
+
+ if (!Src1Imm && Src0Imm)
+ InstsToSwap--;
+ else if (Src1Imm && !Src0Imm &&
+ UseInst->getOperand(1).getImm() == SISrcMods::NONE &&
+ TRI->isVGPR(*MRI, Src0.getReg()))
+ InstsToSwap++;
----------------
mbrkusanin wrote:
This is not accurate when src0 is immediate and src1 is either sgpr or vgpr with source modifiers.
Also it is not accurate when either of the src operands is an sgpr and the other one is a vgpr without source modifiers.
https://github.com/llvm/llvm-project/pull/135162
More information about the llvm-commits
mailing list