[llvm] [Hexagon] Add Hexagon Load Widening Pass (PR #116330)
Brian Cain via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 15 10:23:58 PST 2024
================
@@ -0,0 +1,915 @@
+//===---HexagonLoadStoreWidening.cpp---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// HexagonStoreWidening:
+// Replace sequences of "narrow" stores to adjacent memory locations with
+// a fewer "wide" stores that have the same effect.
+// For example, replace:
+// S4_storeirb_io %100, 0, 0 ; store-immediate-byte
+// S4_storeirb_io %100, 1, 0 ; store-immediate-byte
+// with
+// S4_storeirh_io %100, 0, 0 ; store-immediate-halfword
+// The above is the general idea. The actual cases handled by the code
+// may be a bit more complex.
+// The purpose of this pass is to reduce the number of outstanding stores,
+// or as one could say, "reduce store queue pressure". Also, wide stores
+// mean fewer stores, and since there are only two memory instructions allowed
+// per packet, it also means fewer packets, and ultimately fewer cycles.
+//
+// HexagonLoadWidening does the same thing as HexagonStoreWidening but
+// for Loads. Here, we try to replace 4-byte Loads with register-pair loads.
+// For example:
+// Replace
+// %2:intregs = L2_loadri_io %1:intregs, 0 :: (load (s32) from %ptr1, align 8)
+// %3:intregs = L2_loadri_io %1:intregs, 4 :: (load (s32) from %ptr2)
+// with
+// %4:doubleregs = L2_loadrd_io %1:intregs, 0 :: (load (s64) from %ptr1)
+// %2:intregs = COPY %4.isub_lo:doubleregs
+// %3:intregs = COPY %4.isub_hi:doubleregs
+//
+// LoadWidening for 8 and 16-bit loads is not useful as we end up generating 2N
+// insts to replace N loads: 1 widened load, N bitwise and, N - 1 shifts
+
+//===---------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-load-store-widening"
+
+static cl::opt<unsigned> MaxMBBSizeForLoadStoreWidening(
+ "max-bb-size-for-load-store-widening", cl::Hidden, cl::init(1000),
+ cl::desc("Limit block size to analyze in load/store widening pass"));
+
+namespace llvm {
+
+FunctionPass *createHexagonStoreWidening();
+FunctionPass *createHexagonLoadWidening();
+void initializeHexagonStoreWideningPass(PassRegistry &);
+void initializeHexagonLoadWideningPass(PassRegistry &);
+
+} // end namespace llvm
+
+namespace {
+
+struct HexagonLoadStoreWidening {
+ enum WideningMode { Store, Load };
+ const HexagonInstrInfo *TII;
+ const HexagonRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ AliasAnalysis *AA;
+ MachineFunction *MF;
+
+public:
+ HexagonLoadStoreWidening(const HexagonInstrInfo *TII,
+ const HexagonRegisterInfo *TRI,
+ MachineRegisterInfo *MRI, AliasAnalysis *AA,
+ MachineFunction *MF, bool StoreMode)
+ : TII(TII), TRI(TRI), MRI(MRI), AA(AA), MF(MF),
+ Mode(StoreMode ? WideningMode::Store : WideningMode::Load),
+ HII(MF->getSubtarget<HexagonSubtarget>().getInstrInfo()) {}
+
+ bool run();
+
+private:
+ const bool Mode;
+ const unsigned MaxWideSize = 8;
+ const HexagonInstrInfo *HII = nullptr;
+
+ using InstrSet = SmallPtrSet<MachineInstr *, 16>;
+ using InstrGroup = SmallVector<MachineInstr *, 8>;
+ using InstrGroupList = SmallVector<InstrGroup, 8>;
+
+ InstrSet ProcessedInsts;
+
+ unsigned getBaseAddressRegister(const MachineInstr *MI);
+ int64_t getOffset(const MachineInstr *MI);
+ int64_t getPostIncrementValue(const MachineInstr *MI);
+ bool handledInstType(const MachineInstr *MI);
+
+ void createGroup(MachineInstr *BaseInst, InstrGroup &Group);
+ void createGroups(MachineBasicBlock &MBB, InstrGroupList &StoreGroups);
+ bool processBasicBlock(MachineBasicBlock &MBB);
+ bool processGroup(InstrGroup &Group);
+ bool selectInsts(InstrGroup::iterator Begin, InstrGroup::iterator End,
+ InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
+ bool createWideInsts(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
+ bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
+ bool createWideLoads(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
+ bool replaceInsts(InstrGroup &OG, InstrGroup &NG);
+ bool areAdjacent(const MachineInstr *S1, const MachineInstr *S2);
+ bool canSwapInstructions(const MachineInstr *A, const MachineInstr *B);
+};
+
+struct HexagonStoreWidening : public MachineFunctionPass {
+ static char ID;
+
+ HexagonStoreWidening() : MachineFunctionPass(ID) {
+ initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Hexagon Store Widening"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MFn) override {
+ if (skipFunction(MFn.getFunction()))
+ return false;
+
+ auto &ST = MFn.getSubtarget<HexagonSubtarget>();
+ const HexagonInstrInfo *TII = ST.getInstrInfo();
+ const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
+ MachineRegisterInfo *MRI = &MFn.getRegInfo();
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, true).run();
+ }
+};
+
+struct HexagonLoadWidening : public MachineFunctionPass {
+ static char ID;
+
+ HexagonLoadWidening() : MachineFunctionPass(ID) {
+ initializeHexagonLoadWideningPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Hexagon Load Widening"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MFn) override {
+ if (skipFunction(MFn.getFunction()))
+ return false;
+
+ auto &ST = MFn.getSubtarget<HexagonSubtarget>();
+ const HexagonInstrInfo *TII = ST.getInstrInfo();
+ const HexagonRegisterInfo *TRI = ST.getRegisterInfo();
+ MachineRegisterInfo *MRI = &MFn.getRegInfo();
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ return HexagonLoadStoreWidening(TII, TRI, MRI, AA, &MFn, false).run();
+ }
+};
+
+char HexagonStoreWidening::ID = 0;
+char HexagonLoadWidening::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
+ "Hexagon Store Widening", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
+ "Hexagon Store Widening", false, false)
+
+INITIALIZE_PASS_BEGIN(HexagonLoadWidening, "hexagon-widen-loads",
+ "Hexagon Load Widening", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonLoadWidening, "hexagon-widen-loads",
+ "Hexagon Load Widening", false, false)
+
+static const MachineMemOperand &getMemTarget(const MachineInstr *MI) {
+ assert(!MI->memoperands_empty() && "Expecting memory operands");
+ return **MI->memoperands_begin();
+}
+
+unsigned
+HexagonLoadStoreWidening::getBaseAddressRegister(const MachineInstr *MI) {
+ assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
+ unsigned Base, Offset;
+ HII->getBaseAndOffsetPosition(*MI, Base, Offset);
+ const MachineOperand &MO = MI->getOperand(Base);
+ assert(MO.isReg() && "Expecting register operand");
+ return MO.getReg();
+}
+
+int64_t HexagonLoadStoreWidening::getOffset(const MachineInstr *MI) {
+ assert(HexagonLoadStoreWidening::handledInstType(MI) && "Unhandled opcode");
+
+ // On Hexagon, post-incs always have an offset of 0
+ // There is no Offset operand to post-incs
+ if (HII->isPostIncrement(*MI))
+ return 0;
+
+ unsigned Base, Offset;
+
+ HII->getBaseAndOffsetPosition(*MI, Base, Offset);
+ const MachineOperand &MO = MI->getOperand(Offset);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ return MO.getImm();
+ case MachineOperand::MO_GlobalAddress:
+ return MO.getOffset();
+ default:
+ break;
+ }
+ llvm_unreachable("Expecting an immediate or global operand");
+}
+
+inline int64_t
+HexagonLoadStoreWidening::getPostIncrementValue(const MachineInstr *MI) {
+ unsigned Base, PostIncIdx;
+ HII->getBaseAndOffsetPosition(*MI, Base, PostIncIdx);
+ const MachineOperand &MO = MI->getOperand(PostIncIdx);
+ return MO.getImm();
+}
+
+// Filtering function: any loads/stores whose opcodes are not "approved" of by
+// this function will not be subjected to widening.
+inline bool HexagonLoadStoreWidening::handledInstType(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ if (Mode == WideningMode::Store) {
+ switch (Opc) {
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeiri_io:
+ case Hexagon::S2_storeri_io:
+ // Base address must be a register. (Implement FI later.)
+ return MI->getOperand(0).isReg();
+ case Hexagon::S2_storeri_pi:
+ return MI->getOperand(1).isReg();
+ }
+ } else {
+ // LoadWidening for 8 and 16 bit loads needs 2x instructions to replace x
+ // loads. So we only widen 32 bit loads as we don't need to select the
+ // right bits with AND & SHIFT ops.
+ switch (Opc) {
+ case Hexagon::L2_loadri_io:
+ // Base address must be a register and offset must be immediate.
+ return !MI->memoperands_empty() && MI->getOperand(1).isReg() &&
+ MI->getOperand(2).isImm();
+ case Hexagon::L2_loadri_pi:
+ return !MI->memoperands_empty() && MI->getOperand(2).isReg();
+ }
+ }
+ return false;
+}
+
+static void addDefsUsesToList(const MachineInstr *MI,
+ DenseSet<Register> &RegDefs,
+ DenseSet<Register> &RegUses) {
+ for (const auto &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ if (Op.isDef())
+ RegDefs.insert(Op.getReg());
+ if (Op.readsReg())
+ RegUses.insert(Op.getReg());
+ }
+}
+
+bool HexagonLoadStoreWidening::canSwapInstructions(const MachineInstr *A,
+ const MachineInstr *B) {
+ DenseSet<Register> ARegDefs;
+ DenseSet<Register> ARegUses;
+ addDefsUsesToList(A, ARegDefs, ARegUses);
+ if (A->mayLoadOrStore() && B->mayLoadOrStore() &&
+ (A->mayStore() || B->mayStore()) && A->mayAlias(AA, *B, true))
+ return false;
+ for (const auto &BOp : B->operands()) {
+ if (!BOp.isReg())
+ continue;
+ if ((BOp.isDef() || BOp.readsReg()) && ARegDefs.contains(BOp.getReg()))
+ return false;
+ if (BOp.isDef() && ARegUses.contains(BOp.getReg()))
+ return false;
+ }
+ return true;
----------------
androm3da wrote:
Are there any ordering dependencies here that should be checked with `LLVM_ENABLE_REVERSE_ITERATION` enabled?
https://github.com/llvm/llvm-project/pull/116330
More information about the llvm-commits
mailing list