[llvm] [RISCV] Implement cross basic block VXRM write insertion. (PR #70382)

Wed Nov 1 13:36:47 PDT 2023

================
@@ -0,0 +1,457 @@
+//===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts writes to the VXRM CSR as needed by vector instructions.
+// Each instruction that uses VXRM carries an operand that contains its required
+// VXRM value. This pass tries to optimize placement to avoid redundant writes
+// to VXRM.
+//
+// This is done using 2 dataflow algorithms. The first is a forward data flow
+// to calculate where a VXRM value is available. The second is a backwards
+// dataflow to determine where a VXRM value is anticipated.
+//
+// Finally, we use the results of these two dataflows to insert VXRM writes
+// where a value is anticipated, but not available.
+//
+// FIXME: This pass does not split critical edges, so there can still be some
+// redundancy.
+//
+// FIXME: If we are willing to have writes that aren't always needed, we could
+// reduce the number of VXRM writes in some cases.
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <queue>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-insert-write-vxrm"
+#define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass"
+
+namespace {
+
+class VXRMInfo {
+  uint8_t VXRMImm = 0;
+
+  enum : uint8_t {
+    Uninitialized,
+    Static,
+    Unknown,
+  } State = Uninitialized;
+
+public:
+  VXRMInfo() {}
+
+  static VXRMInfo getUnknown() {
+    VXRMInfo Info;
+    Info.setUnknown();
+    return Info;
+  }
+
+  bool isValid() const { return State != Uninitialized; }
+  void setUnknown() { State = Unknown; }
+  bool isUnknown() const { return State == Unknown; }
+
+  bool isStatic() const { return State == Static; }
+
+  void setVXRMImm(unsigned Imm) {
+    assert(Imm <= 3 && "Unexpected VXRM value");
+    VXRMImm = Imm;
+    State = Static;
+  }
+  unsigned getVXRMImm() const {
+    assert(isStatic() && VXRMImm <= 3 && "Unexpected state");
+    return VXRMImm;
+  }
+
+  bool operator==(const VXRMInfo &Other) const {
+    // Uninitialized is only equal to another Uninitialized.
+    if (!isValid())
+      return !Other.isValid();
+    if (!Other.isValid())
+      return !isValid();
+
+    // Unknown is only equal to another Unknown.
+    if (isUnknown())
+      return Other.isUnknown();
+    if (Other.isUnknown())
+      return isUnknown();
+
+    return VXRMImm == Other.VXRMImm;
+  }
+
+  bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); }
+
+  // Calculate the VXRMInfo visible to a block assuming this and Other are
+  // both predecessors.
+  VXRMInfo intersect(const VXRMInfo &Other) const {
+    // If the new value isn't valid, ignore it.
+    if (!Other.isValid())
+      return *this;
+
+    // If this value isn't valid, this must be the first predecessor, use it.
+    if (!isValid())
+      return Other;
+
+    // If either is unknown, the result is unknown.
+    if (isUnknown() || Other.isUnknown())
+      return VXRMInfo::getUnknown();
+
+    // If we have an exact match, return this.
+    if (*this == Other)
+      return *this;
+
+    // Otherwise the result is unknown.
+    return VXRMInfo::getUnknown();
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  /// Support for debugging, callable in GDB: V->dump()
+  LLVM_DUMP_METHOD void dump() const {
+    print(dbgs());
+    dbgs() << "\n";
+  }
+
+  void print(raw_ostream &OS) const {
+    OS << '{';
+    if (!isValid())
+      OS << "Uninitialized";
+    else if (isUnknown())
+      OS << "Unknown";
+    else
+      OS << getVXRMImm();
+    OS << '}';
+  }
+#endif
+};
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_ATTRIBUTE_USED
+inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) {
+  V.print(OS);
+  return OS;
+}
+#endif
+
+struct BlockData {
+  // Indicates if the block uses VXRM. Uninitialized means no use.
+  VXRMInfo VXRMUse;
+
+  // Indicates the VXRM output from the block. Unitialized means transparent.
+  VXRMInfo VXRMOut;
+
+  // Keeps track of the available VXRM value at the start of the basic bloc.
+  VXRMInfo AvailableIn;
+
+  // Keeps track of the available VXRM value at the end of the basic block.
+  VXRMInfo AvailableOut;
+
+  // Keeps track of what VXRM is anticipated at the start of the basic block.
+  VXRMInfo AnticipatedIn;
+
+  // Keeps track of what VXRM is anticipated at the end of the basic block.
+  VXRMInfo AnticipatedOut;
+
+  // Keeps track of whether the block is already in the queue.
+  bool InQueue;
+
+  BlockData() = default;
+};
+
+class RISCVInsertWriteVXRM : public MachineFunctionPass {
+  const TargetInstrInfo *TII;
+
+  std::vector<BlockData> BlockInfo;
+  std::queue<const MachineBasicBlock *> WorkList;
+
+public:
+  static char ID;
+
+  RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {
+    initializeRISCVInsertWriteVXRMPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  StringRef getPassName() const override {
+    return RISCV_INSERT_WRITE_VXRM_NAME;
+  }
+
+private:
+  bool computeVXRMChanges(const MachineBasicBlock &MBB);
+  void computeAvailable(const MachineBasicBlock &MBB);
+  void computeAnticipated(const MachineBasicBlock &MBB);
+  void emitWriteVXRM(MachineBasicBlock &MBB);
+};
+
+} // end anonymous namespace
+
+char RISCVInsertWriteVXRM::ID = 0;
+
+INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
+                false, false)
+
+bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
+  BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+  bool NeedVXRMWrite = false;
+  for (const MachineInstr &MI : MBB) {
+    int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
+    if (VXRMIdx >= 0) {
+      unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
+
+      if (!BBInfo.VXRMUse.isValid())
+        BBInfo.VXRMUse.setVXRMImm(NewVXRMImm);
+
+      BBInfo.VXRMOut.setVXRMImm(NewVXRMImm);
+      NeedVXRMWrite = true;
+    }
+
+    if (MI.isCall() || MI.isInlineAsm()) {
+      if (!BBInfo.VXRMUse.isValid())
+        BBInfo.VXRMUse.setUnknown();
+
+      BBInfo.VXRMOut.setUnknown();
+    }
+  }
+
+  return NeedVXRMWrite;
+}
+
+void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) {
+  BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+  BBInfo.InQueue = false;
+
+  VXRMInfo Available;
+  if (MBB.pred_empty()) {
+    Available.setUnknown();
+  } else {
+    for (const MachineBasicBlock *P : MBB.predecessors())
+      Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut);
+  }
+
+  // If we don't have any valid available info, wait until we do.
+  if (!Available.isValid())
+    return;
+
+  if (Available != BBInfo.AvailableIn) {
+    BBInfo.AvailableIn = Available;
+    LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB)
+                      << " changed to " << BBInfo.AvailableIn << "\n");
+  }
+
+  if (BBInfo.VXRMOut.isValid())
+    Available = BBInfo.VXRMOut;
+
+  if (Available == BBInfo.AvailableOut)
+    return;
+
+  BBInfo.AvailableOut = Available;
+  LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB)
+                    << " changed to " << BBInfo.AvailableOut << "\n");
+
+  // Add the successors to the work list so that we can propagate.
+  for (MachineBasicBlock *S : MBB.successors()) {
+    if (!BlockInfo[S->getNumber()].InQueue) {
+      BlockInfo[S->getNumber()].InQueue = true;
+      WorkList.push(S);
+    }
+  }
+}
+
+void RISCVInsertWriteVXRM::computeAnticipated(const MachineBasicBlock &MBB) {
+  BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+  BBInfo.InQueue = false;
+
+  VXRMInfo Anticipated;
+  if (MBB.succ_empty()) {
+    Anticipated.setUnknown();
+  } else {
+    for (const MachineBasicBlock *S : MBB.successors())
+      Anticipated =
+          Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn);
+  }
+
+  // If we don't have any valid anticipated info, wait until we do.
+  if (!Anticipated.isValid())
+    return;
+
+  if (Anticipated != BBInfo.AnticipatedOut) {
+    BBInfo.AnticipatedOut = Anticipated;
+    LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB)
+                      << " changed to " << BBInfo.AnticipatedOut << "\n");
+  }
+
+  // If this block reads VXRM, copy it.
+  if (BBInfo.VXRMUse.isValid())
+    Anticipated = BBInfo.VXRMUse;
+
+  if (Anticipated == BBInfo.AnticipatedIn)
+    return;
+
+  BBInfo.AnticipatedIn = Anticipated;
+  LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB)
+                    << " changed to " << BBInfo.AnticipatedIn << "\n");
+
+  // Add the predecessors to the work list so that we can propagate.
+  for (MachineBasicBlock *P : MBB.predecessors()) {
+    if (!BlockInfo[P->getNumber()].InQueue) {
+      BlockInfo[P->getNumber()].InQueue = true;
+      WorkList.push(P);
+    }
+  }
+}
+
+void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
+  const BlockData &BBInfo = BlockInfo[MBB.getNumber()];
+
+  VXRMInfo Info = BBInfo.AvailableIn;
+
+  // Insert VXRM write if anticipated and not available.
+  if (BBInfo.AnticipatedIn.isStatic()) {
+    bool NeedInsert = false;
+    // If there no predecessors and the value is anticipated, insert.
+    if (MBB.pred_empty()) {
+      NeedInsert = true;
+    } else {
+      // Search for any predecessors that wouldn't satisfy our requirement and
+      // insert a write VXRM if needed.
+      // NOTE: If one predecessor is able to provide the requirement, but
+      // another isn't, it means we have a critical edge. The better placement
+      // would be to split the critical edge.
+      for (MachineBasicBlock *P : MBB.predecessors()) {
+        const BlockData &PInfo = BlockInfo[P->getNumber()];
+        // If it's available out of the predecessor, then we're ok.
+        if (PInfo.AvailableOut.isStatic() &&
+            PInfo.AvailableOut.getVXRMImm() ==
+                BBInfo.AnticipatedIn.getVXRMImm())
+          continue;
+        // If the predecessor anticipated this value for all its succesors,
+        // then it should have already inserted.
----------------
preames wrote:

Can you reword this comment?  "Should have already" implies it must have run first.  However, since the visit order isn't defined for the phase 4 loop, the work may not have actually been done yet.  

https://github.com/llvm/llvm-project/pull/70382