[llvm] [AArch64] Add SME peephole optimizer pass (PR #104612)

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 16 11:40:58 PDT 2024


================
@@ -0,0 +1,216 @@
+//===- SMEPeepholeOpt.cpp - SME peephole optimization pass-----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This pass tries to remove back-to-back (smstart, smstop) and
+// (smstop, smstart) sequences. The pass is conservative when it cannot
+// determine that it is safe to remove these sequences.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "Utils/AArch64SMEAttributes.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-sme-peephole-opt"
+
+namespace {
+
+struct SMEPeepholeOpt : public MachineFunctionPass {
+  static char ID;
+
+  SMEPeepholeOpt() : MachineFunctionPass(ID) {
+    initializeSMEPeepholeOptPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "SME Peephole Optimization pass";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool optimizeStartStopPairs(MachineBasicBlock &MBB,
+                              bool &HasRemainingSMChange) const;
+};
+
+char SMEPeepholeOpt::ID = 0;
+
+} // end anonymous namespace
+
+static bool isConditionalStartStop(const MachineInstr *MI) {
+  return MI->getOpcode() == AArch64::MSRpstatePseudo;
+}
+
+static bool isMatchingStartStopPair(const MachineInstr *MI1,
+                                    const MachineInstr *MI2) {
+  // We only consider the same type of streaming mode change here, i.e.
+  // start/stop SM, or start/stop ZA pairs.
+  if (MI1->getOperand(0).getImm() != MI2->getOperand(0).getImm())
+    return false;
+
+  // One must be 'start', the other must be 'stop'
+  if (MI1->getOperand(1).getImm() == MI2->getOperand(1).getImm())
+    return false;
+
+  bool IsConditional = isConditionalStartStop(MI2);
+  if (isConditionalStartStop(MI1) != IsConditional)
+    return false;
+
+  if (!IsConditional)
+    return true;
+
+  // Check to make sure the conditional start/stop pairs are identical.
+  if (MI1->getOperand(2).getImm() != MI2->getOperand(2).getImm())
+    return false;
+
+  // Ensure reg masks are identical.
+  if (MI1->getOperand(4).getRegMask() != MI2->getOperand(4).getRegMask())
+    return false;
+
+  // This optimisation is unlikely to happen in practice for conditional
+  // smstart/smstop pairs as the virtual registers for pstate.sm will always
+  // be different.
+  // TODO: For this optimisation to apply to conditional smstart/smstop,
+  // this pass will need to do more work to remove redundant calls to
+  // __arm_sme_state.
+
+  // Only consider conditional start/stop pairs which read the same register
+  // holding the original value of pstate.sm, as some conditional start/stops
+  // require the state on entry to the function.
+  if (MI1->getOperand(3).isReg() && MI2->getOperand(3).isReg()) {
+    Register Reg1 = MI1->getOperand(3).getReg();
+    Register Reg2 = MI2->getOperand(3).getReg();
+    if (Reg1.isPhysical() || Reg2.isPhysical() || Reg1 != Reg2)
+      return false;
+  }
+
+  return true;
+}
+
+static bool ChangesStreamingMode(const MachineInstr *MI) {
+  assert((MI->getOpcode() == AArch64::MSRpstatesvcrImm1 ||
+          MI->getOpcode() == AArch64::MSRpstatePseudo) &&
+         "Expected MI to be a smstart/smstop instruction");
+  return MI->getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
+         MI->getOperand(0).getImm() == AArch64SVCR::SVCRSMZA;
+}
+
+bool SMEPeepholeOpt::optimizeStartStopPairs(MachineBasicBlock &MBB,
+                                            bool &HasRemainingSMChange) const {
+  SmallVector<MachineInstr *, 4> ToBeRemoved;
+
+  bool Changed = false;
+  MachineInstr *Prev = nullptr;
+  HasRemainingSMChange = false;
+
+  auto Reset = [&]() {
+    if (Prev && ChangesStreamingMode(Prev))
+      HasRemainingSMChange = true;
+    Prev = nullptr;
+    ToBeRemoved.clear();
+  };
+
+  // Walk through instructions in the block trying to find pairs of smstart
+  // and smstop nodes that cancel each other out. We only permit a limited
+  // set of instructions to appear between them, otherwise we reset our
+  // tracking.
+  for (MachineInstr &MI : make_early_inc_range(MBB)) {
+    switch (MI.getOpcode()) {
+    default:
+      Reset();
+      break;
+    case AArch64::COPY: {
+      // Permit copies of 32 and 64-bit registers.
+      if (!MI.getOperand(1).isReg()) {
----------------
davemgreen wrote:

Should this check the first operand too, just in case in is a cross-register-bank gpr->fpr copy?

https://github.com/llvm/llvm-project/pull/104612


More information about the llvm-commits mailing list