[llvm] [AMDGPU][AMDGPUDemoteSCCBranchToExecz] demote s_cbranch_scc0/1 branches into vcmp + s_cbranch_execz branches (PR #110284)
Juan Manuel Martinez CaamaƱo via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 14 08:20:06 PDT 2024
================
@@ -0,0 +1,307 @@
+#include <llvm/CodeGen/MachineFunctionPass.h>
+
+#include "AMDGPU.h"
+#include "AMDGPUDemoteSCCBranchToExecz.h"
+#include "GCNSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+#define DEBUG_TYPE "amdgpu-demote-scc-to-execz"
+const char PassName[] = "AMDGPU s_cbranch_scc to s_cbranch_execz conversion";
+
+std::optional<unsigned> getVALUOpc(const MachineInstr &MI,
+ bool Reverse = false) {
+ unsigned Opc = MI.getOpcode();
+ if (Reverse) {
+ switch (Opc) {
+ case AMDGPU::S_CMP_EQ_I32:
+ Opc = AMDGPU::S_CMP_LG_I32;
+ break;
+ case AMDGPU::S_CMP_LG_I32:
+ Opc = AMDGPU::S_CMP_EQ_I32;
+ break;
+ case AMDGPU::S_CMP_GT_I32:
+ Opc = AMDGPU::S_CMP_LE_I32;
+ break;
+ case AMDGPU::S_CMP_GE_I32:
+ Opc = AMDGPU::S_CMP_LT_I32;
+ break;
+ case AMDGPU::S_CMP_LT_I32:
+ Opc = AMDGPU::S_CMP_GE_I32;
+ break;
+ case AMDGPU::S_CMP_LE_I32:
+ Opc = AMDGPU::S_CMP_GT_I32;
+ break;
+ case AMDGPU::S_CMP_EQ_U32:
+ Opc = AMDGPU::S_CMP_LG_U32;
+ break;
+ case AMDGPU::S_CMP_LG_U32:
+ Opc = AMDGPU::S_CMP_EQ_U32;
+ break;
+ case AMDGPU::S_CMP_GT_U32:
+ Opc = AMDGPU::S_CMP_LE_U32;
+ break;
+ case AMDGPU::S_CMP_GE_U32:
+ Opc = AMDGPU::S_CMP_LT_U32;
+ break;
+ case AMDGPU::S_CMP_LT_U32:
+ Opc = AMDGPU::S_CMP_GE_U32;
+ break;
+ case AMDGPU::S_CMP_LE_U32:
+ Opc = AMDGPU::S_CMP_GT_U32;
+ break;
+ case AMDGPU::S_CMP_EQ_U64:
+ Opc = AMDGPU::S_CMP_LG_U64;
+ break;
+ case AMDGPU::S_CMP_LG_U64:
+ Opc = AMDGPU::S_CMP_EQ_U64;
+ break;
+ default:
+ return std::nullopt;
+ }
+ }
+
+ switch (Opc) {
+ case AMDGPU::S_CMP_EQ_I32:
+ return AMDGPU::V_CMP_EQ_I32_e64;
+ case AMDGPU::S_CMP_LG_I32:
+ return AMDGPU::V_CMP_LT_I32_e64;
+ case AMDGPU::S_CMP_GT_I32:
+ return AMDGPU::V_CMP_GT_I32_e64;
+ case AMDGPU::S_CMP_GE_I32:
+ return AMDGPU::V_CMP_GE_I32_e64;
+ case AMDGPU::S_CMP_LT_I32:
+ return AMDGPU::V_CMP_LT_I32_e64;
+ case AMDGPU::S_CMP_LE_I32:
+ return AMDGPU::V_CMP_LE_I32_e64;
+ case AMDGPU::S_CMP_EQ_U32:
+ return AMDGPU::V_CMP_EQ_U32_e64;
+ case AMDGPU::S_CMP_LG_U32:
+ return AMDGPU::V_CMP_NE_U32_e64;
+ case AMDGPU::S_CMP_GT_U32:
+ return AMDGPU::V_CMP_GT_U32_e64;
+ case AMDGPU::S_CMP_GE_U32:
+ return AMDGPU::V_CMP_GE_U32_e64;
+ case AMDGPU::S_CMP_LT_U32:
+ return AMDGPU::V_CMP_LT_U32_e64;
+ case AMDGPU::S_CMP_LE_U32:
+ return AMDGPU::V_CMP_LE_U32_e64;
+ case AMDGPU::S_CMP_EQ_U64:
+ return AMDGPU::V_CMP_EQ_U64_e64;
+ case AMDGPU::S_CMP_LG_U64:
+ return AMDGPU::V_CMP_NE_U64_e64;
+ default:
+ return std::nullopt;
+ }
+}
+
+bool isSCmpPromotableToVCmp(const MachineInstr &MI) {
+ return getVALUOpc(MI).has_value();
+}
+
+bool isTriangular(MachineBasicBlock &Head, MachineBasicBlock *&Then,
+ MachineBasicBlock *&Tail) {
+ if (Head.succ_size() != 2)
+ return false;
+
+ Then = Head.succ_begin()[0];
+ Tail = Head.succ_begin()[1];
+
+ // Canonicalize so Succ0 has MBB as its single predecessor.
+ if (Then->pred_size() != 1)
+ std::swap(Then, Tail);
+
+ if (Then->pred_size() != 1 || Then->succ_size() != 1)
+ return false;
+
+ return *Then->succ_begin() == Tail;
+}
+
+bool hasPromotableCmpConditon(MachineInstr &Term, MachineInstr *&Cmp) {
+ auto CmpIt = std::next(Term.getReverseIterator());
+ if (CmpIt == Term.getParent()->instr_rend())
+ return false;
+
----------------
jmmartinez wrote:
I've added the condition:
```cpp
bool SCCIsUsedOutsideHead = any_of(
Head.liveouts(), [](const auto &P) { return P.PhysReg == AMDGPU::SCC; });
if (SCCIsUsedOutsideHead)
return false;
```
I'm not sure if there is a better way to do this.
https://github.com/llvm/llvm-project/pull/110284
More information about the llvm-commits
mailing list