[llvm] [Hexagon] Add Hexagon Load Widening Pass (PR #116330)

Brian Cain via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 15 10:23:58 PST 2024


================
@@ -0,0 +1,915 @@
+//===---HexagonLoadStoreWidening.cpp---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// HexagonStoreWidening:
+// Replace sequences of "narrow" stores to adjacent memory locations with
+// a fewer "wide" stores that have the same effect.
+// For example, replace:
+//   S4_storeirb_io  %100, 0, 0   ; store-immediate-byte
+//   S4_storeirb_io  %100, 1, 0   ; store-immediate-byte
+// with
+//   S4_storeirh_io  %100, 0, 0   ; store-immediate-halfword
+// The above is the general idea.  The actual cases handled by the code
+// may be a bit more complex.
+// The purpose of this pass is to reduce the number of outstanding stores,
+// or as one could say, "reduce store queue pressure".  Also, wide stores
+// mean fewer stores, and since there are only two memory instructions allowed
+// per packet, it also means fewer packets, and ultimately fewer cycles.
+//
+// HexagonLoadWidening does the same thing as HexagonStoreWidening but
+// for Loads. Here, we try to replace 4-byte Loads with register-pair loads.
+// For example:
+// Replace
+//   %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8)
+//   %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2)
+// with
+//   %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1)
+//   %2:intregs = COPY %4.isub_lo:doubleregs
+//   %3:intregs = COPY %4.isub_hi:doubleregs
+//
+// LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N
+// insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts
+
+//===---------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-load-store-widening"
+
+static cl::opt<unsigned> MaxMBBSizeForLoadStoreWidening(
+    "max-bb-size-for-load-store-widening", cl::Hidden, cl::init(1000),
+    cl::desc("Limit block size to analyze in load/store widening pass"));
+
+namespace llvm {
+
+FunctionPass *createHexagonStoreWidening();
+FunctionPass *createHexagonLoadWidening();
+void initializeHexagonStoreWideningPass(PassRegistry &);
+void initializeHexagonLoadWideningPass(PassRegistry &);
+
+} // end namespace llvm
+
+namespace {
+
+struct HexagonLoadStoreWidening {
+  enum WideningMode { Store, Load };
+  const HexagonInstrInfo *TII;
+  const HexagonRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+  AliasAnalysis *AA;
+  MachineFunction *MF;
+
+public:
+  HexagonLoadStoreWidening(const HexagonInstrInfo *TII,
+                           const HexagonRegisterInfo *TRI,
+                           MachineRegisterInfo *MRI, AliasAnalysis *AA,
+                           MachineFunction *MF, bool StoreMode)
+      : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF),
+        Mode(StoreMode ? WideningMode::Store : WideningMode::Load),
+        HII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {}
+
+  bool run();
+
+private:
+  const bool Mode;
+  const unsigned MaxWideSize = 8;
+  const HexagonInstrInfo *HII = nullptr;
+
+  using InstrSet = SmallPtrSet<MachineInstr *, 16>;
+  using InstrGroup = SmallVector<MachineInstr *, 8>;
+  using InstrGroupList = SmallVector<InstrGroup, 8>;
+
+  InstrSet ProcessedInsts;
+
+  unsigned getBaseAddressRegister(const MachineInstr *MI);
+  int64_t getOffset(const MachineInstr *MI);
+  int64_t getPostIncrementValue(const MachineInstr *MI);
+  bool handledInstType(const MachineInstr *MI);
+
+  void createGroup(MachineInstr *BaseInst, InstrGroup &Group);
+  void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups);
+  bool processBasicBlock(MachineBasicBlock &MBB);
+  bool processGroup(InstrGroup &Group);
+  bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End,
+                   InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
+  bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
+  bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
+  bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
+  bool replaceInsts(InstrGroup &OG, InstrGroup &NG);
+  bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2);
+  bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B);
+};
+
+struct HexagonStoreWidening : public MachineFunctionPass {
+  static char ID;
+
+  HexagonStoreWidening() : MachineFunctionPass(ID) {
+    initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "Hexagon Store Widening"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addPreserved<AAResultsWrapperPass>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MFn) override {
+    if (skipFunction(MFn.getFunction()))
+      return false;
+
+    auto &ST = MFn.getSubtarget<HexagonSubtarget>();
+    const HexagonInstrInfo *TII = ST.getInstrInfo();
+    const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
+    MachineRegisterInfo *MRI = &MFn.getRegInfo();
+    AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+    return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run();
+  }
+};
+
+struct HexagonLoadWidening : public MachineFunctionPass {
+  static char ID;
+
+  HexagonLoadWidening() : MachineFunctionPass(ID) {
+    initializeHexagonLoadWideningPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "Hexagon Load Widening"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addPreserved<AAResultsWrapperPass>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MFn) override {
+    if (skipFunction(MFn.getFunction()))
+      return false;
+
+    auto &ST = MFn.getSubtarget<HexagonSubtarget>();
+    const HexagonInstrInfo *TII = ST.getInstrInfo();
+    const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
+    MachineRegisterInfo *MRI = &MFn.getRegInfo();
+    AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+    return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run();
+  }
+};
+
+char HexagonStoreWidening::ID = 0;
+char HexagonLoadWidening::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
+                      "Hexagon Store Widening", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
+                    "Hexagon Store Widening", false, false)
+
+INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads",
+                      "Hexagon Load Widening", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads",
+                    "Hexagon Load Widening", false, false)
+
+static const MachineMemOperand &getMemTarget(const MachineInstr *MI) {
+  assert(!MI->memoperands_empty() && "Expecting memory operands");
+  return **MI->memoperands_begin();
+}
+
+unsigned
+HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) {
+  assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
+  unsigned Base, Offset;
+  HII->getBaseAndOffsetPosition(*MI, Base, Offset);
+  const MachineOperand &MO = MI->getOperand(Base);
+  assert(MO.isReg() && "Expecting register operand");
+  return MO.getReg();
+}
+
+int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) {
+  assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
+
+  // On Hexagon, post-incs always have an offset of 0
+  // There is no Offset operand to post-incs
+  if (HII->isPostIncrement(*MI))
+    return 0;
+
+  unsigned Base, Offset;
+
+  HII->getBaseAndOffsetPosition(*MI, Base, Offset);
+  const MachineOperand &MO = MI->getOperand(Offset);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Immediate:
+    return MO.getImm();
+  case MachineOperand::MO_GlobalAddress:
+    return MO.getOffset();
+  default:
+    break;
+  }
+  llvm_unreachable("Expecting an immediate or global operand");
+}
+
+inline int64_t
+HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) {
+  unsigned Base, PostIncIdx;
+  HII->getBaseAndOffsetPosition(*MI, Base, PostIncIdx);
+  const MachineOperand &MO = MI->getOperand(PostIncIdx);
+  return MO.getImm();
+}
+
+// Filtering function: any loads/stores whose opcodes are not "approved" of by
+// this function will not be subjected to widening.
+inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  if (Mode == WideningMode::Store) {
+    switch (Opc) {
+    case Hexagon::S4_storeirb_io:
+    case Hexagon::S4_storeirh_io:
+    case Hexagon::S4_storeiri_io:
+    case Hexagon::S2_storeri_io:
+      // Base address must be a register. (Implement FI later.)
+      return MI->getOperand(0).isReg();
+    case Hexagon::S2_storeri_pi:
+      return MI->getOperand(1).isReg();
+    }
+  } else {
+    // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x
+    // loads. So we only widen 32 bit loads as we don't need to select the
+    // right bits with AND & SHIFT ops.
+    switch (Opc) {
+    case Hexagon::L2_loadri_io:
+      // Base address must be a register and offset must be immediate.
+      return !MI->memoperands_empty() && MI->getOperand(1).isReg() &&
+             MI->getOperand(2).isImm();
+    case Hexagon::L2_loadri_pi:
+      return !MI->memoperands_empty() && MI->getOperand(2).isReg();
+    }
+  }
+  return false;
+}
+
+static void addDefsUsesToList(const MachineInstr *MI,
+                              DenseSet<Register> &RegDefs,
+                              DenseSet<Register> &RegUses) {
+  for (const auto &Op : MI->operands()) {
+    if (!Op.isReg())
+      continue;
+    if (Op.isDef())
+      RegDefs.insert(Op.getReg());
+    if (Op.readsReg())
+      RegUses.insert(Op.getReg());
+  }
+}
+
+bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A,
+                                                   const MachineInstr *B) {
+  DenseSet<Register> ARegDefs;
+  DenseSet<Register> ARegUses;
+  addDefsUsesToList(A, ARegDefs, ARegUses);
+  if (A->mayLoadOrStore() && B->mayLoadOrStore() &&
+      (A->mayStore() || B->mayStore()) && A->mayAlias(AA, *B, true))
+    return false;
+  for (const auto &BOp : B->operands()) {
+    if (!BOp.isReg())
+      continue;
+    if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(BOp.getReg()))
+      return false;
+    if (BOp.isDef() && ARegUses.contains(BOp.getReg()))
+      return false;
+  }
+  return true;
----------------
androm3da wrote:

Are there any ordering dependencies here that should be checked with `LLVM_ENABLE_REVERSE_ITERATION` enabled?

https://github.com/llvm/llvm-project/pull/116330


More information about the llvm-commits mailing list