[llvm] [RISCV] Implement cross basic block VXRM write insertion. (PR #70382)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 13:36:46 PDT 2023
================
@@ -0,0 +1,457 @@
+//===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts writes to the VXRM CSR as needed by vector instructions.
+// Each instruction that uses VXRM carries an operand that contains its required
+// VXRM value. This pass tries to optimize placement to avoid redundant writes
+// to VXRM.
+//
+// This is done using 2 dataflow algorithms. The first is a forward data flow
+// to calculate where a VXRM value is available. The second is a backwards
+// dataflow to determine where a VXRM value is anticipated.
+//
+// Finally, we use the results of these two dataflows to insert VXRM writes
+// where a value is anticipated, but not available.
+//
+// FIXME: This pass does not split critical edges, so there can still be some
+// redundancy.
+//
+// FIXME: If we are willing to have writes that aren't always needed, we could
+// reduce the number of VXRM writes in some cases.
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <queue>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-insert-write-vxrm"
+#define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass"
+
+namespace {
+
+class VXRMInfo {
+ uint8_t VXRMImm = 0;
+
+ enum : uint8_t {
+ Uninitialized,
+ Static,
+ Unknown,
+ } State = Uninitialized;
+
+public:
+ VXRMInfo() {}
+
+ static VXRMInfo getUnknown() {
+ VXRMInfo Info;
+ Info.setUnknown();
+ return Info;
+ }
+
+ bool isValid() const { return State != Uninitialized; }
+ void setUnknown() { State = Unknown; }
+ bool isUnknown() const { return State == Unknown; }
+
+ bool isStatic() const { return State == Static; }
+
+ void setVXRMImm(unsigned Imm) {
+ assert(Imm <= 3 && "Unexpected VXRM value");
+ VXRMImm = Imm;
+ State = Static;
+ }
+ unsigned getVXRMImm() const {
+ assert(isStatic() && VXRMImm <= 3 && "Unexpected state");
+ return VXRMImm;
+ }
+
+ bool operator==(const VXRMInfo &Other) const {
+ // Uninitialized is only equal to another Uninitialized.
+ if (!isValid())
+ return !Other.isValid();
+ if (!Other.isValid())
+ return !isValid();
+
+ // Unknown is only equal to another Unknown.
+ if (isUnknown())
+ return Other.isUnknown();
+ if (Other.isUnknown())
+ return isUnknown();
+
+ return VXRMImm == Other.VXRMImm;
+ }
+
+ bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); }
+
+ // Calculate the VXRMInfo visible to a block assuming this and Other are
+ // both predecessors.
+ VXRMInfo intersect(const VXRMInfo &Other) const {
+ // If the new value isn't valid, ignore it.
+ if (!Other.isValid())
+ return *this;
+
+ // If this value isn't valid, this must be the first predecessor, use it.
+ if (!isValid())
+ return Other;
+
+ // If either is unknown, the result is unknown.
+ if (isUnknown() || Other.isUnknown())
+ return VXRMInfo::getUnknown();
+
+ // If we have an exact match, return this.
+ if (*this == Other)
+ return *this;
+
+ // Otherwise the result is unknown.
+ return VXRMInfo::getUnknown();
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Support for debugging, callable in GDB: V->dump()
+ LLVM_DUMP_METHOD void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+
+ void print(raw_ostream &OS) const {
+ OS << '{';
+ if (!isValid())
+ OS << "Uninitialized";
+ else if (isUnknown())
+ OS << "Unknown";
+ else
+ OS << getVXRMImm();
+ OS << '}';
+ }
+#endif
+};
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_ATTRIBUTE_USED
+inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) {
+ V.print(OS);
+ return OS;
+}
+#endif
+
+struct BlockData {
+ // Indicates if the block uses VXRM. Uninitialized means no use.
+ VXRMInfo VXRMUse;
+
+ // Indicates the VXRM output from the block. Unitialized means transparent.
+ VXRMInfo VXRMOut;
+
+ // Keeps track of the available VXRM value at the start of the basic bloc.
+ VXRMInfo AvailableIn;
+
+ // Keeps track of the available VXRM value at the end of the basic block.
+ VXRMInfo AvailableOut;
+
+ // Keeps track of what VXRM is anticipated at the start of the basic block.
+ VXRMInfo AnticipatedIn;
+
+ // Keeps track of what VXRM is anticipated at the end of the basic block.
+ VXRMInfo AnticipatedOut;
+
+ // Keeps track of whether the block is already in the queue.
+ bool InQueue;
+
+ BlockData() = default;
+};
+
+class RISCVInsertWriteVXRM : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+
+ std::vector<BlockData> BlockInfo;
+ std::queue<const MachineBasicBlock *> WorkList;
+
+public:
+ static char ID;
+
+ RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {
+ initializeRISCVInsertWriteVXRMPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return RISCV_INSERT_WRITE_VXRM_NAME;
+ }
+
+private:
+ bool computeVXRMChanges(const MachineBasicBlock &MBB);
+ void computeAvailable(const MachineBasicBlock &MBB);
+ void computeAnticipated(const MachineBasicBlock &MBB);
+ void emitWriteVXRM(MachineBasicBlock &MBB);
+};
+
+} // end anonymous namespace
+
+char RISCVInsertWriteVXRM::ID = 0;
+
+INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
+ false, false)
+
+bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ bool NeedVXRMWrite = false;
+ for (const MachineInstr &MI : MBB) {
+ int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
+ if (VXRMIdx >= 0) {
+ unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
+
+ if (!BBInfo.VXRMUse.isValid())
+ BBInfo.VXRMUse.setVXRMImm(NewVXRMImm);
+
+ BBInfo.VXRMOut.setVXRMImm(NewVXRMImm);
+ NeedVXRMWrite = true;
+ }
+
+ if (MI.isCall() || MI.isInlineAsm()) {
+ if (!BBInfo.VXRMUse.isValid())
+ BBInfo.VXRMUse.setUnknown();
+
+ BBInfo.VXRMOut.setUnknown();
+ }
+ }
+
+ return NeedVXRMWrite;
+}
+
+void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) {
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ BBInfo.InQueue = false;
+
+ VXRMInfo Available;
+ if (MBB.pred_empty()) {
+ Available.setUnknown();
+ } else {
+ for (const MachineBasicBlock *P : MBB.predecessors())
+ Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut);
+ }
+
+ // If we don't have any valid available info, wait until we do.
+ if (!Available.isValid())
+ return;
+
+ if (Available != BBInfo.AvailableIn) {
+ BBInfo.AvailableIn = Available;
+ LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AvailableIn << "\n");
+ }
+
+ if (BBInfo.VXRMOut.isValid())
+ Available = BBInfo.VXRMOut;
+
+ if (Available == BBInfo.AvailableOut)
+ return;
+
+ BBInfo.AvailableOut = Available;
+ LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AvailableOut << "\n");
+
+ // Add the successors to the work list so that we can propagate.
+ for (MachineBasicBlock *S : MBB.successors()) {
+ if (!BlockInfo[S->getNumber()].InQueue) {
+ BlockInfo[S->getNumber()].InQueue = true;
+ WorkList.push(S);
+ }
+ }
+}
+
+void RISCVInsertWriteVXRM::computeAnticipated(const MachineBasicBlock &MBB) {
+ BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ BBInfo.InQueue = false;
+
+ VXRMInfo Anticipated;
+ if (MBB.succ_empty()) {
+ Anticipated.setUnknown();
+ } else {
+ for (const MachineBasicBlock *S : MBB.successors())
+ Anticipated =
+ Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn);
+ }
+
+ // If we don't have any valid anticipated info, wait until we do.
+ if (!Anticipated.isValid())
+ return;
+
+ if (Anticipated != BBInfo.AnticipatedOut) {
+ BBInfo.AnticipatedOut = Anticipated;
+ LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AnticipatedOut << "\n");
+ }
+
+ // If this block reads VXRM, copy it.
+ if (BBInfo.VXRMUse.isValid())
+ Anticipated = BBInfo.VXRMUse;
+
+ if (Anticipated == BBInfo.AnticipatedIn)
+ return;
+
+ BBInfo.AnticipatedIn = Anticipated;
+ LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB)
+ << " changed to " << BBInfo.AnticipatedIn << "\n");
+
+ // Add the predecessors to the work list so that we can propagate.
+ for (MachineBasicBlock *P : MBB.predecessors()) {
+ if (!BlockInfo[P->getNumber()].InQueue) {
+ BlockInfo[P->getNumber()].InQueue = true;
+ WorkList.push(P);
+ }
+ }
+}
+
+void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
+ const BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+ VXRMInfo Info = BBInfo.AvailableIn;
+
+ // Insert VXRM write if anticipated and not available.
+ if (BBInfo.AnticipatedIn.isStatic()) {
+ bool NeedInsert = false;
+ // If there no predecessors and the value is anticipated, insert.
+ if (MBB.pred_empty()) {
+ NeedInsert = true;
+ } else {
+ // Search for any predecessors that wouldn't satisfy our requirement and
+ // insert a write VXRM if needed.
+ // NOTE: If one predecessor is able to provide the requirement, but
+ // another isn't, it means we have a critical edge. The better placement
+ // would be to split the critical edge.
+ for (MachineBasicBlock *P : MBB.predecessors()) {
+ const BlockData &PInfo = BlockInfo[P->getNumber()];
+ // If it's available out of the predecessor, then we're ok.
+ if (PInfo.AvailableOut.isStatic() &&
+ PInfo.AvailableOut.getVXRMImm() ==
+ BBInfo.AnticipatedIn.getVXRMImm())
+ continue;
+ // If the predecessor anticipated this value for all its succesors,
+ // then it should have already inserted.
+ if (PInfo.AnticipatedOut.isStatic() &&
+ PInfo.AnticipatedOut.getVXRMImm() ==
+ BBInfo.AnticipatedIn.getVXRMImm())
+ continue;
+ NeedInsert = true;
+ break;
+ }
+ }
+
+ if (NeedInsert) {
+ BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(),
+ TII->get(RISCV::WriteVXRMImm))
+ .addImm(BBInfo.AnticipatedIn.getVXRMImm());
+ Info.setVXRMImm(BBInfo.AnticipatedIn.getVXRMImm());
+ }
+ Info = BBInfo.AnticipatedIn;
+ }
+
+ for (MachineInstr &MI : MBB) {
+ int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
+ if (VXRMIdx >= 0) {
+ unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
+
+ if (!Info.isStatic() || Info.getVXRMImm() != NewVXRMImm) {
+ LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs()));
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
+ .addImm(NewVXRMImm);
+ }
+
+ MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
+ /*IsImp*/ true));
+ Info.setVXRMImm(NewVXRMImm);
+ continue;
+ }
+
+ if (MI.getOpcode() == RISCV::WriteVXRMImm) {
+ Info.setVXRMImm(MI.getOperand(0).getImm());
+ continue;
+ }
+
+ if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VXRM))
----------------
preames wrote:
The modifiesRegister case is missing from computeVXRMChanges.
https://github.com/llvm/llvm-project/pull/70382
More information about the llvm-commits
mailing list