[llvm] [AArch64][SME] Implement the SME ABI (ZA state management) in Machine IR (PR #149062)

Benjamin Maxwell via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 27 03:16:59 PDT 2025


================
@@ -0,0 +1,696 @@
+//===- MachineSMEABIPass.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements the SME ABI requirements for ZA state. This includes
+// implementing the lazy ZA state save schemes around calls.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass works by collecting instructions that require ZA to be in a
+// specific state (e.g., "ACTIVE" or "SAVED") and inserting the necessary state
+// transitions to ensure ZA is in the required state before instructions. State
+// transitions represent actions such as setting up or restoring a lazy save.
+// Certain points within a function may also have predefined states independent
+// of any instructions, for example, a "shared_za" function is always entered
+// and exited in the "ACTIVE" state.
+//
+// To handle ZA state across control flow, we make use of edge bundling. This
+// assigns each block an "incoming" and "outgoing" edge bundle (representing
+// incoming and outgoing edges). Initially, these are unique to each block;
+// then, in the process of forming bundles, the outgoing block of a block is
+// joined with the incoming bundle of all successors. The result is that each
+// bundle can be assigned a single ZA state, which ensures the state required by
+// all a blocks' successors is the same, and that each basic block will always
+// be entered with the same ZA state. This eliminates the need for splitting
+// edges to insert state transitions or "phi" nodes for ZA states.
+//
+// See below for a simple example of edge bundling.
+//
+// The following shows a conditionally executed basic block (BB1):
+//
+// if (cond)
+//   BB1
+// BB2
+//
+// Initial Bundles         Joined Bundles
+//
+//   ┌──0──┐                ┌──0──┐
+//   │ BB0 │                │ BB0 │
+//   └──1──┘                └──1──┘
+//      ├───────┐              ├───────┐
+//      ▼       │              ▼       │
+//   ┌──2──┐    │   ─────►  ┌──1──┐    │
+//   │ BB1 │    ▼           │ BB1 │    ▼
+//   └──3──┘ ┌──4──┐        └──1──┘ ┌──1──┐
+//      └───►4 BB2 │           └───►1 BB2 │
+//           └──5──┘                └──2──┘
+//
+// On the left are the initial per-block bundles, and on the right are the
+// joined bundles (which are the result of the EdgeBundles analysis).
+
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-machine-sme-abi"
+
+namespace {
+
+enum ZAState {
+  // Any/unknown state (not valid)
+  ANY = 0,
+
+  // ZA is in use and active (i.e. within the accumulator)
+  ACTIVE,
+
+  // A ZA save has been set up or committed (i.e. ZA is dormant or off)
+  LOCAL_SAVED,
+
+  // ZA is off or a lazy save has been set up by the caller
+  CALLER_DORMANT,
+
+  // ZA is off
+  OFF,
+
+  // The number of ZA states (not a valid state)
+  NUM_ZA_STATE
+};
+
+/// A bitmask enum to record live physical registers that the "emit*" routines
+/// may need to preserve. Note: This only tracks registers we may clobber.
+enum LiveRegs : uint8_t {
+  None = 0,
+  NZCV = 1 << 0,
+  W0 = 1 << 1,
+  W0_HI = 1 << 2,
+  X0 = W0 | W0_HI,
+  LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ W0_HI)
+};
+
+/// Holds the virtual registers live physical registers have been saved to.
+struct PhysRegSave {
+  LiveRegs PhysLiveRegs;
+  Register StatusFlags = AArch64::NoRegister;
+  Register X0Save = AArch64::NoRegister;
+};
+
+static bool isLegalEdgeBundleZAState(ZAState State) {
+  switch (State) {
+  case ZAState::ACTIVE:
+  case ZAState::LOCAL_SAVED:
+    return true;
+  default:
+    return false;
+  }
+}
+struct TPIDR2State {
+  int FrameIndex = -1;
+};
+
+StringRef getZAStateString(ZAState State) {
+#define MAKE_CASE(V)                                                           \
+  case V:                                                                      \
+    return #V;
+  switch (State) {
+    MAKE_CASE(ZAState::ANY)
+    MAKE_CASE(ZAState::ACTIVE)
+    MAKE_CASE(ZAState::LOCAL_SAVED)
+    MAKE_CASE(ZAState::CALLER_DORMANT)
+    MAKE_CASE(ZAState::OFF)
+  default:
+    llvm_unreachable("Unexpected ZAState");
+  }
+#undef MAKE_CASE
+}
+
+static bool isZAorZT0RegOp(const TargetRegisterInfo &TRI,
+                           const MachineOperand &MO) {
+  if (!MO.isReg() || !MO.getReg().isPhysical())
+    return false;
+  return any_of(TRI.subregs_inclusive(MO.getReg()), [](const MCPhysReg &SR) {
+    return AArch64::MPR128RegClass.contains(SR) ||
+           AArch64::ZTRRegClass.contains(SR);
+  });
+}
+
+/// Returns the required ZA state needed before \p MI and an iterator pointing
+/// to where any code required to change the ZA state should be inserted.
+static std::pair<ZAState, MachineBasicBlock::iterator>
+getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI,
+                     bool ZAOffAtReturn) {
+  MachineBasicBlock::iterator InsertPt(MI);
+
+  if (MI.getOpcode() == AArch64::InOutZAUsePseudo)
+    return {ZAState::ACTIVE, std::prev(InsertPt)};
+
+  if (MI.getOpcode() == AArch64::RequiresZASavePseudo)
+    return {ZAState::LOCAL_SAVED, std::prev(InsertPt)};
+
+  if (MI.isReturn())
+    return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt};
+
+  for (auto &MO : MI.operands()) {
+    if (isZAorZT0RegOp(TRI, MO))
+      return {ZAState::ACTIVE, InsertPt};
+  }
+
+  return {ZAState::ANY, InsertPt};
+}
+
+struct MachineSMEABI : public MachineFunctionPass {
+  inline static char ID = 0;
+
+  MachineSMEABI() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return "Machine SME ABI pass"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<EdgeBundlesWrapperLegacy>();
+    AU.addPreservedID(MachineLoopInfoID);
+    AU.addPreservedID(MachineDominatorsID);
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  /// Collects the needed ZA state (and live registers) before each instruction
+  /// within the machine function.
+  void collectNeededZAStates(SMEAttrs);
+
+  /// Assigns each edge bundle a ZA state based on the needed states of blocks
+  /// that have incoming or outgoing edges in that bundle.
+  void assignBundleZAStates();
+
+  /// Inserts code to handle changes between ZA states within the function.
+  /// E.g., ACTIVE -> LOCAL_SAVED will insert code required to save ZA.
+  void insertStateChanges();
+
+  // Emission routines for private and shared ZA functions (using lazy saves).
+  void emitNewZAPrologue(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI);
+  void emitRestoreLazySave(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           LiveRegs PhysLiveRegs);
+  void emitSetupLazySave(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI);
+  void emitAllocateLazySaveBuffer(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator MBBI);
+  void emitZAOff(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                 bool ClearTPIDR2);
+
+  void emitStateChange(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                       ZAState From, ZAState To, LiveRegs PhysLiveRegs);
+
+  /// Save live physical registers to virtual registers.
+  PhysRegSave createPhysRegSave(LiveRegs PhysLiveRegs, MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI, DebugLoc DL);
+  /// Restore physical registers from a save of their previous values.
+  void restorePhyRegSave(PhysRegSave const &RegSave, MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI, DebugLoc DL);
+
+  /// Get or create a TPIDR2 block in this function.
+  TPIDR2State getTPIDR2Block();
+
+private:
+  /// Contains the needed ZA state (and live registers) at an instruction.
+  struct InstInfo {
+    ZAState NeededState{ZAState::ANY};
+    MachineBasicBlock::iterator InsertPt;
+    LiveRegs PhysLiveRegs = LiveRegs::None;
+  };
+
+  /// Contains the needed ZA state for each instruction in a block.
+  /// Instructions that do not require a ZA state are not recorded.
+  struct BlockInfo {
+    ZAState FixedEntryState{ZAState::ANY};
+    SmallVector<InstInfo> Insts;
+    LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
+  };
+
+  // All pass state that must be cleared between functions.
+  struct PassState {
+    SmallVector<BlockInfo> Blocks;
+    SmallVector<ZAState> BundleStates;
+    std::optional<TPIDR2State> TPIDR2Block;
+  } State;
+
+  MachineFunction *MF = nullptr;
+  EdgeBundles *EdgeBundles = nullptr;
+  const AArch64Subtarget *Subtarget = nullptr;
+  const AArch64RegisterInfo *TRI = nullptr;
+  const TargetInstrInfo *TII = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
+};
+
+void MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
+  assert((SMEFnAttrs.hasZT0State() || SMEFnAttrs.hasZAState()) &&
+         "Expected function to have ZA/ZT0 state!");
+
+  State.Blocks.resize(MF->getNumBlockIDs());
+  for (MachineBasicBlock &MBB : *MF) {
+    BlockInfo &Block = State.Blocks[MBB.getNumber()];
+    if (&MBB == &MF->front()) {
----------------
MacDue wrote:

Done in #155588 (along with your previous suggestion). 

https://github.com/llvm/llvm-project/pull/149062


More information about the llvm-commits mailing list