[llvm] [AMDGPU][AMDGPUDemoteSCCBranchToExecz] demote s_cbranch_scc0/1 branches into vcmp + s_cbranch_execz branches (PR #110284)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 11 03:59:17 PDT 2024
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>,
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>,
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/110284 at github.com>
================
@@ -0,0 +1,307 @@
+#include <llvm/CodeGen/MachineFunctionPass.h>
+
+#include "AMDGPU.h"
+#include "AMDGPUDemoteSCCBranchToExecz.h"
+#include "GCNSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+#define DEBUG_TYPE "amdgpu-demote-scc-to-execz"
+const char PassName[] = "AMDGPU s_cbranch_scc to s_cbranch_execz conversion";
+
+std::optional<unsigned> getVALUOpc(const MachineInstr &MI,
+ bool Reverse = false) {
+ unsigned Opc = MI.getOpcode();
+ if (Reverse) {
+ switch (Opc) {
+ case AMDGPU::S_CMP_EQ_I32:
+ Opc = AMDGPU::S_CMP_LG_I32;
+ break;
+ case AMDGPU::S_CMP_LG_I32:
+ Opc = AMDGPU::S_CMP_EQ_I32;
+ break;
+ case AMDGPU::S_CMP_GT_I32:
+ Opc = AMDGPU::S_CMP_LE_I32;
+ break;
+ case AMDGPU::S_CMP_GE_I32:
+ Opc = AMDGPU::S_CMP_LT_I32;
+ break;
+ case AMDGPU::S_CMP_LT_I32:
+ Opc = AMDGPU::S_CMP_GE_I32;
+ break;
+ case AMDGPU::S_CMP_LE_I32:
+ Opc = AMDGPU::S_CMP_GT_I32;
+ break;
+ case AMDGPU::S_CMP_EQ_U32:
+ Opc = AMDGPU::S_CMP_LG_U32;
+ break;
+ case AMDGPU::S_CMP_LG_U32:
+ Opc = AMDGPU::S_CMP_EQ_U32;
+ break;
+ case AMDGPU::S_CMP_GT_U32:
+ Opc = AMDGPU::S_CMP_LE_U32;
+ break;
+ case AMDGPU::S_CMP_GE_U32:
+ Opc = AMDGPU::S_CMP_LT_U32;
+ break;
+ case AMDGPU::S_CMP_LT_U32:
+ Opc = AMDGPU::S_CMP_GE_U32;
+ break;
+ case AMDGPU::S_CMP_LE_U32:
+ Opc = AMDGPU::S_CMP_GT_U32;
+ break;
+ case AMDGPU::S_CMP_EQ_U64:
+ Opc = AMDGPU::S_CMP_LG_U64;
+ break;
+ case AMDGPU::S_CMP_LG_U64:
+ Opc = AMDGPU::S_CMP_EQ_U64;
+ break;
+ default:
+ return std::nullopt;
+ }
+ }
+
+ switch (Opc) {
+ case AMDGPU::S_CMP_EQ_I32:
+ return AMDGPU::V_CMP_EQ_I32_e64;
+ case AMDGPU::S_CMP_LG_I32:
+ return AMDGPU::V_CMP_LT_I32_e64;
+ case AMDGPU::S_CMP_GT_I32:
+ return AMDGPU::V_CMP_GT_I32_e64;
+ case AMDGPU::S_CMP_GE_I32:
+ return AMDGPU::V_CMP_GE_I32_e64;
+ case AMDGPU::S_CMP_LT_I32:
+ return AMDGPU::V_CMP_LT_I32_e64;
+ case AMDGPU::S_CMP_LE_I32:
+ return AMDGPU::V_CMP_LE_I32_e64;
+ case AMDGPU::S_CMP_EQ_U32:
+ return AMDGPU::V_CMP_EQ_U32_e64;
+ case AMDGPU::S_CMP_LG_U32:
+ return AMDGPU::V_CMP_NE_U32_e64;
+ case AMDGPU::S_CMP_GT_U32:
+ return AMDGPU::V_CMP_GT_U32_e64;
+ case AMDGPU::S_CMP_GE_U32:
+ return AMDGPU::V_CMP_GE_U32_e64;
+ case AMDGPU::S_CMP_LT_U32:
+ return AMDGPU::V_CMP_LT_U32_e64;
+ case AMDGPU::S_CMP_LE_U32:
+ return AMDGPU::V_CMP_LE_U32_e64;
+ case AMDGPU::S_CMP_EQ_U64:
+ return AMDGPU::V_CMP_EQ_U64_e64;
+ case AMDGPU::S_CMP_LG_U64:
+ return AMDGPU::V_CMP_NE_U64_e64;
+ default:
+ return std::nullopt;
+ }
+}
+
+bool isSCmpPromotableToVCmp(const MachineInstr &MI) {
+ return getVALUOpc(MI).has_value();
+}
+
+bool isTriangular(MachineBasicBlock &Head, MachineBasicBlock *&Then,
+ MachineBasicBlock *&Tail) {
+ if (Head.succ_size() != 2)
+ return false;
+
+ Then = Head.succ_begin()[0];
+ Tail = Head.succ_begin()[1];
+
+ // Canonicalize so Succ0 has MBB as its single predecessor.
+ if (Then->pred_size() != 1)
+ std::swap(Then, Tail);
+
+ if (Then->pred_size() != 1 || Then->succ_size() != 1)
+ return false;
+
+ return *Then->succ_begin() == Tail;
+}
+
+bool hasPromotableCmpConditon(MachineInstr &Term, MachineInstr *&Cmp) {
+ auto CmpIt = std::next(Term.getReverseIterator());
+ if (CmpIt == Term.getParent()->instr_rend())
+ return false;
+
----------------
arsenm wrote:
You need to defend against the case where SCC has other uses (i.e. it's live out)
https://github.com/llvm/llvm-project/pull/110284
More information about the llvm-commits
mailing list