[llvm] [AMDGPU][AMDGPUDemoteSCCBranchToExecz] demote s_cbranch_scc0/1 branches into vcmp + s_cbranch_execz branches (PR #110284)
Juan Manuel Martinez CaamaƱo via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 14 08:20:06 PDT 2024
================
@@ -0,0 +1,251 @@
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+#include "AMDGPU.h"
+#include "AMDGPUDemoteSCCBranchToExecz.h"
+#include "GCNSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+#define DEBUG_TYPE "amdgpu-demote-scc-to-execz"
+const char PassName[] = "AMDGPU s_cbranch_scc to s_cbranch_execz conversion";
+
+std::optional<unsigned> getVALUOpc(const MachineInstr &MI,
+ bool Reverse = false) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+#define HandleOpcAndReverse(Opc, ReverseOpc, VOpc, ReverseVOpc) \
+ case Opc: \
+ return Reverse ? ReverseVOpc : VOpc; \
+ case ReverseOpc: \
+ return Reverse ? VOpc : ReverseVOpc
+ HandleOpcAndReverse(AMDGPU::S_CMP_EQ_I32, AMDGPU::S_CMP_LG_I32,
+ AMDGPU::V_CMP_EQ_I32_e64, AMDGPU::V_CMP_NE_I32_e64);
+ HandleOpcAndReverse(AMDGPU::S_CMP_EQ_U32, AMDGPU::S_CMP_LG_U32,
+ AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_NE_U32_e64);
+ HandleOpcAndReverse(AMDGPU::S_CMP_GT_I32, AMDGPU::S_CMP_LE_I32,
+ AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_LE_I32_e64);
+ HandleOpcAndReverse(AMDGPU::S_CMP_GT_U32, AMDGPU::S_CMP_LE_U32,
+ AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_LE_U32_e64);
+ HandleOpcAndReverse(AMDGPU::S_CMP_GE_I32, AMDGPU::S_CMP_LT_I32,
+ AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_LT_I32_e64);
+ HandleOpcAndReverse(AMDGPU::S_CMP_GE_U32, AMDGPU::S_CMP_LT_U32,
+ AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_LT_U32_e64);
+ HandleOpcAndReverse(AMDGPU::S_CMP_EQ_U64, AMDGPU::S_CMP_LG_U64,
+ AMDGPU::V_CMP_EQ_U64_e64, AMDGPU::V_CMP_NE_U64_e64);
+#undef HandleOpcAndReverse
+ default:
+ break;
+ }
+ return std::nullopt;
+}
+
+bool isSCmpPromotableToVCmp(const MachineInstr &MI) {
+ return getVALUOpc(MI).has_value();
+}
+
+bool isTriangular(MachineBasicBlock &Head, MachineBasicBlock *&Then,
+ MachineBasicBlock *&Tail) {
+ if (Head.succ_size() != 2)
+ return false;
+
+ Then = Head.succ_begin()[0];
+ Tail = Head.succ_begin()[1];
+
+ // Canonicalize so Succ0 has MBB as its single predecessor.
+ if (Then->pred_size() != 1)
+ std::swap(Then, Tail);
+
+ if (Then->pred_size() != 1 || Then->succ_size() != 1)
+ return false;
+
+ return *Then->succ_begin() == Tail;
+}
+
+bool hasPromotableCmpConditon(MachineInstr &Term, MachineInstr *&Cmp) {
+ auto CmpIt = std::next(Term.getReverseIterator());
+ if (CmpIt == Term.getParent()->instr_rend())
+ return false;
+
+ if (!isSCmpPromotableToVCmp(*CmpIt))
+ return false;
+
+ Cmp = &*CmpIt;
+ return true;
+}
+
+bool hasCbranchSCCTerm(MachineBasicBlock &Head, MachineInstr *&Term) {
+ auto TermIt = Head.getFirstInstrTerminator();
+ if (TermIt == Head.end())
+ return false;
+
+ switch (TermIt->getOpcode()) {
+ case AMDGPU::S_CBRANCH_SCC0:
+ case AMDGPU::S_CBRANCH_SCC1:
+ Term = &*TermIt;
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isTriangularSCCBranch(MachineBasicBlock &Head, MachineInstr *&Term,
+ MachineInstr *&Cmp, MachineBasicBlock *&Then,
+ MachineBasicBlock *&Tail) {
+
+ if (!hasCbranchSCCTerm(Head, Term))
+ return false;
+
+ bool SCCIsUsedOutsideHead = any_of(
+ Head.liveouts(), [](const auto &P) { return P.PhysReg == AMDGPU::SCC; });
----------------
jmmartinez wrote:
Add profitable test case where this condition is `true`.
https://github.com/llvm/llvm-project/pull/110284
More information about the llvm-commits
mailing list