[llvm] [AArch64] Add SME peephole optimizer pass (PR #104612)

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 19 10:18:50 PDT 2024


================
@@ -0,0 +1,216 @@
+//===- SMEPeepholeOpt.cpp - SME peephole optimization pass-----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This pass tries to remove back-to-back (smstart, smstop) and
+// (smstop, smstart) sequences. The pass is conservative when it cannot
+// determine that it is safe to remove these sequences.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "Utils/AArch64SMEAttributes.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-sme-peephole-opt"
+
+namespace {
+
+struct SMEPeepholeOpt : public MachineFunctionPass {
+  static char ID;
+
+  SMEPeepholeOpt() : MachineFunctionPass(ID) {
+    initializeSMEPeepholeOptPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "SME Peephole Optimization pass";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool optimizeStartStopPairs(MachineBasicBlock &MBB,
+                              bool &HasRemainingSMChange) const;
+};
+
+char SMEPeepholeOpt::ID = 0;
+
+} // end anonymous namespace
+
+static bool isConditionalStartStop(const MachineInstr *MI) {
+  return MI->getOpcode() == AArch64::MSRpstatePseudo;
+}
+
+static bool isMatchingStartStopPair(const MachineInstr *MI1,
+                                    const MachineInstr *MI2) {
+  // We only consider the same type of streaming mode change here, i.e.
+  // start/stop SM, or start/stop ZA pairs.
+  if (MI1->getOperand(0).getImm() != MI2->getOperand(0).getImm())
+    return false;
+
+  // One must be 'start', the other must be 'stop'
+  if (MI1->getOperand(1).getImm() == MI2->getOperand(1).getImm())
+    return false;
+
+  bool IsConditional = isConditionalStartStop(MI2);
+  if (isConditionalStartStop(MI1) != IsConditional)
+    return false;
+
+  if (!IsConditional)
+    return true;
+
+  // Check to make sure the conditional start/stop pairs are identical.
+  if (MI1->getOperand(2).getImm() != MI2->getOperand(2).getImm())
+    return false;
+
+  // Ensure reg masks are identical.
+  if (MI1->getOperand(4).getRegMask() != MI2->getOperand(4).getRegMask())
+    return false;
+
+  // This optimisation is unlikely to happen in practice for conditional
+  // smstart/smstop pairs as the virtual registers for pstate.sm will always
+  // be different.
+  // TODO: For this optimisation to apply to conditional smstart/smstop,
+  // this pass will need to do more work to remove redundant calls to
+  // __arm_sme_state.
+
+  // Only consider conditional start/stop pairs which read the same register
+  // holding the original value of pstate.sm, as some conditional start/stops
+  // require the state on entry to the function.
+  if (MI1->getOperand(3).isReg() && MI2->getOperand(3).isReg()) {
+    Register Reg1 = MI1->getOperand(3).getReg();
+    Register Reg2 = MI2->getOperand(3).getReg();
+    if (Reg1.isPhysical() || Reg2.isPhysical() || Reg1 != Reg2)
+      return false;
+  }
+
+  return true;
+}
+
+static bool ChangesStreamingMode(const MachineInstr *MI) {
+  assert((MI->getOpcode() == AArch64::MSRpstatesvcrImm1 ||
+          MI->getOpcode() == AArch64::MSRpstatePseudo) &&
+         "Expected MI to be a smstart/smstop instruction");
+  return MI->getOperand(0).getImm() == AArch64SVCR::SVCRSM ||
+         MI->getOperand(0).getImm() == AArch64SVCR::SVCRSMZA;
+}
+
+bool SMEPeepholeOpt::optimizeStartStopPairs(MachineBasicBlock &MBB,
+                                            bool &HasRemainingSMChange) const {
+  SmallVector<MachineInstr *, 4> ToBeRemoved;
+
+  bool Changed = false;
+  MachineInstr *Prev = nullptr;
+  HasRemainingSMChange = false;
+
+  auto Reset = [&]() {
+    if (Prev && ChangesStreamingMode(Prev))
+      HasRemainingSMChange = true;
+    Prev = nullptr;
+    ToBeRemoved.clear();
+  };
+
+  // Walk through instructions in the block trying to find pairs of smstart
+  // and smstop nodes that cancel each other out. We only permit a limited
+  // set of instructions to appear between them, otherwise we reset our
+  // tracking.
+  for (MachineInstr &MI : make_early_inc_range(MBB)) {
+    switch (MI.getOpcode()) {
+    default:
+      Reset();
+      break;
+    case AArch64::COPY: {
+      // Permit copies of 32 and 64-bit registers.
+      if (!MI.getOperand(1).isReg()) {
+        Reset();
+        break;
+      }
+      Register Reg = MI.getOperand(1).getReg();
+      if (!AArch64::GPR32RegClass.contains(Reg) &&
+          !AArch64::GPR64RegClass.contains(Reg))
+        Reset();
+      break;
+    }
+    case AArch64::ADJCALLSTACKDOWN:
+    case AArch64::ADJCALLSTACKUP:
+    case AArch64::ANDXri:
+    case AArch64::ADDXri:
+      // We permit these as they don't generate SVE/NEON instructions.
+      break;
+    case AArch64::VGRestorePseudo:
+    case AArch64::VGSavePseudo:
+      // When the smstart/smstop are removed, we should also remove
+      // the pseudos that save/restore the VG value for CFI info.
+      ToBeRemoved.push_back(&MI);
+      break;
+    case AArch64::MSRpstatesvcrImm1:
+    case AArch64::MSRpstatePseudo: {
+      if (!Prev)
+        Prev = &MI;
+      else if (isMatchingStartStopPair(Prev, &MI)) {
+        // If they match, we can remove them, and possibly any instructions
+        // that we marked for deletion in between.
+        Prev->eraseFromParent();
+        MI.eraseFromParent();
+        for (MachineInstr *TBR : ToBeRemoved)
+          TBR->eraseFromParent();
+        ToBeRemoved.clear();
+        Prev = nullptr;
+        Changed = true;
+      } else {
+        Reset();
----------------
davemgreen wrote:

I see. My point was maybe quite contrived. If have a block like:
```
bb:
  smstart

```
It doesn't have anything else in it (maybe a copy or another "allowed" instruction), and no terminator. There would be another block with the corresponding smstop, and another block with a smstart/smtop pair that gets removed (to set `Changed=true`). In the block above it would start out as `HasRemainingSMChange = false;`, look at the smstart to set `if (!Prev) Prev = &MI;`, then return because there were no changes. Nothing set `HasRemainingSMChange`, so the runOnMachineFunction function sets `AFI->setHasStreamingModeChanges(FunctionHasRemainingSMChange);` (= false). It has to be a bit contrived to set `Changed` but not `FunctionHasRemainingSMChange`.

https://github.com/llvm/llvm-project/pull/104612


More information about the llvm-commits mailing list