[llvm] [AMDGPU] Swap V_CNDMASK operands to shrink it into VOP2 (PR #135162)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 30 06:36:17 PDT 2025
================
@@ -831,6 +835,215 @@ bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
return true;
}
+unsigned SIShrinkInstructions::getInverseCompareOpcode(MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ // int 32
+ case AMDGPU::V_CMP_EQ_I32_e64:
+ return AMDGPU::V_CMP_NE_I32_e64;
+ case AMDGPU::V_CMP_NE_I32_e64:
+ return AMDGPU::V_CMP_EQ_I32_e64;
+ case AMDGPU::V_CMP_GE_I32_e64:
+ return AMDGPU::V_CMP_LT_I32_e64;
+ case AMDGPU::V_CMP_LE_I32_e64:
+ return AMDGPU::V_CMP_GT_I32_e64;
+ case AMDGPU::V_CMP_GT_I32_e64:
+ return AMDGPU::V_CMP_LE_I32_e64;
+ case AMDGPU::V_CMP_LT_I32_e64:
+ return AMDGPU::V_CMP_GE_I32_e64;
+ // int 64
+ case AMDGPU::V_CMP_EQ_I64_e64:
+ return AMDGPU::V_CMP_NE_I64_e64;
+ case AMDGPU::V_CMP_NE_I64_e64:
+ return AMDGPU::V_CMP_EQ_I64_e64;
+ case AMDGPU::V_CMP_GE_I64_e64:
+ return AMDGPU::V_CMP_LT_I64_e64;
+ case AMDGPU::V_CMP_LE_I64_e64:
+ return AMDGPU::V_CMP_GT_I64_e64;
+ case AMDGPU::V_CMP_GT_I64_e64:
+ return AMDGPU::V_CMP_LE_I64_e64;
+ case AMDGPU::V_CMP_LT_I64_e64:
+ return AMDGPU::V_CMP_GE_I64_e64;
+ // unsigned 32
+ case AMDGPU::V_CMP_EQ_U32_e64:
+ return AMDGPU::V_CMP_NE_U32_e64;
+ case AMDGPU::V_CMP_NE_U32_e64:
+ return AMDGPU::V_CMP_EQ_U32_e64;
+ case AMDGPU::V_CMP_GE_U32_e64:
+ return AMDGPU::V_CMP_LT_U32_e64;
+ case AMDGPU::V_CMP_LE_U32_e64:
+ return AMDGPU::V_CMP_GT_U32_e64;
+ case AMDGPU::V_CMP_GT_U32_e64:
+ return AMDGPU::V_CMP_LE_U32_e64;
+ case AMDGPU::V_CMP_LT_U32_e64:
+ return AMDGPU::V_CMP_GE_U32_e64;
+ // unsigned 64
+ case AMDGPU::V_CMP_EQ_U64_e64:
+ return AMDGPU::V_CMP_NE_U64_e64;
+ case AMDGPU::V_CMP_NE_U64_e64:
+ return AMDGPU::V_CMP_EQ_U64_e64;
+ case AMDGPU::V_CMP_GE_U64_e64:
+ return AMDGPU::V_CMP_LT_U64_e64;
+ case AMDGPU::V_CMP_LE_U64_e64:
+ return AMDGPU::V_CMP_GT_U64_e64;
+ case AMDGPU::V_CMP_GT_U64_e64:
+ return AMDGPU::V_CMP_LE_U64_e64;
+ case AMDGPU::V_CMP_LT_U64_e64:
+ return AMDGPU::V_CMP_GE_U64_e64;
+ // float 32
+ case AMDGPU::V_CMP_EQ_F32_e64:
+ return AMDGPU::V_CMP_NEQ_F32_e64;
+ case AMDGPU::V_CMP_NEQ_F32_e64:
+ return AMDGPU::V_CMP_EQ_F32_e64;
+ case AMDGPU::V_CMP_GE_F32_e64:
+ return AMDGPU::V_CMP_NGE_F32_e64;
+ case AMDGPU::V_CMP_NGE_F32_e64:
+ return AMDGPU::V_CMP_GE_F32_e64;
+ case AMDGPU::V_CMP_LE_F32_e64:
+ return AMDGPU::V_CMP_NLE_F32_e64;
+ case AMDGPU::V_CMP_NLE_F32_e64:
+ return AMDGPU::V_CMP_LE_F32_e64;
+ case AMDGPU::V_CMP_GT_F32_e64:
+ return AMDGPU::V_CMP_NGT_F32_e64;
+ case AMDGPU::V_CMP_NGT_F32_e64:
+ return AMDGPU::V_CMP_GT_F32_e64;
+ case AMDGPU::V_CMP_LT_F32_e64:
+ return AMDGPU::V_CMP_NLT_F32_e64;
+ case AMDGPU::V_CMP_NLT_F32_e64:
+ return AMDGPU::V_CMP_LT_F32_e64;
+ case AMDGPU::V_CMP_LG_F32_e64:
+ return AMDGPU::V_CMP_NLG_F32_e64;
+ case AMDGPU::V_CMP_NLG_F32_e64:
+ return AMDGPU::V_CMP_LG_F32_e64;
+ case AMDGPU::V_CMP_O_F32_e64:
+ return AMDGPU::V_CMP_U_F32_e64;
+ case AMDGPU::V_CMP_U_F32_e64:
+ return AMDGPU::V_CMP_O_F32_e64;
+ // float 64
+ case AMDGPU::V_CMP_EQ_F64_e64:
+ return AMDGPU::V_CMP_NEQ_F64_e64;
+ case AMDGPU::V_CMP_NEQ_F64_e64:
+ return AMDGPU::V_CMP_EQ_F64_e64;
+ case AMDGPU::V_CMP_GE_F64_e64:
+ return AMDGPU::V_CMP_NGE_F64_e64;
+ case AMDGPU::V_CMP_NGE_F64_e64:
+ return AMDGPU::V_CMP_GE_F64_e64;
+ case AMDGPU::V_CMP_LE_F64_e64:
+ return AMDGPU::V_CMP_NLE_F64_e64;
+ case AMDGPU::V_CMP_NLE_F64_e64:
+ return AMDGPU::V_CMP_LE_F64_e64;
+ case AMDGPU::V_CMP_GT_F64_e64:
+ return AMDGPU::V_CMP_NGT_F64_e64;
+ case AMDGPU::V_CMP_NGT_F64_e64:
+ return AMDGPU::V_CMP_GT_F32_e64;
+ case AMDGPU::V_CMP_LT_F64_e64:
+ return AMDGPU::V_CMP_NLT_F64_e64;
+ case AMDGPU::V_CMP_NLT_F64_e64:
+ return AMDGPU::V_CMP_LT_F64_e64;
+ case AMDGPU::V_CMP_LG_F64_e64:
+ return AMDGPU::V_CMP_NLG_F64_e64;
+ case AMDGPU::V_CMP_NLG_F64_e64:
+ return AMDGPU::V_CMP_LG_F64_e64;
+ case AMDGPU::V_CMP_O_F64_e64:
+ return AMDGPU::V_CMP_U_F64_e64;
+ case AMDGPU::V_CMP_U_F64_e64:
+ return AMDGPU::V_CMP_O_F64_e64;
+ default:
+ return 0;
+ }
+}
+
+bool SIShrinkInstructions::shouldSwapCndOperands(
+ Register Reg, std::vector<MachineInstr *> &UsesToProcess) const {
+ auto AllUses = MRI->use_nodbg_instructions(Reg);
+ int InstsToSwap = 0;
+
+ for (auto &UseInst : AllUses) {
----------------
mbrkusanin wrote:
Decision to swap operands is done either on all instructions or none, since the original cmp instruction that defines vcc is also changed. You can not really avoid looking at all uses.
Preforming the fold from the use cndmask is inconvenient because you would have to track whether the specific vcc value was already analyzed.
https://github.com/llvm/llvm-project/pull/135162
More information about the llvm-commits
mailing list