[llvm] [RISCV] Separate doLocalPostpass into new pass and move to post vector regalloc (PR #88295)

Mon Apr 22 07:43:55 PDT 2024

================
@@ -1682,3 +1653,121 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
   return new RISCVInsertVSETVLI();
 }
+
+// Now that all vsetvlis are explicit, go through and do block local
+// DSE and peephole based demanded fields based transforms.  Note that
+// this *must* be done outside the main dataflow so long as we allow
+// any cross block analysis within the dataflow.  We can't have both
+// demanded fields based mutation and non-local analysis in the
+// dataflow at the same time without introducing inconsistencies.
+bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
+  // Skip if the vector extension is not enabled.
+  auto *ST = &MF.getSubtarget<RISCVSubtarget>();
+  if (!ST->hasVInstructions())
+    return false;
+
+  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+
+  bool Changed = false;
+
+  const auto *TII = ST->getInstrInfo();
+  auto *MRI = &MF.getRegInfo();
+  for (MachineBasicBlock &MBB : MF) {
+    MachineInstr *NextMI = nullptr;
+    // We can have arbitrary code in successors, so VL and VTYPE
+    // must be considered demanded.
+    DemandedFields Used;
+    Used.demandVL();
+    Used.demandVTYPE();
+    SmallVector<MachineInstr *> ToDelete;
+    for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+
+      if (!isVectorConfigInstr(MI)) {
+        doUnion(Used, getDemanded(MI, MRI, ST));
+        continue;
+      }
+
+      Register VRegDef = MI.getOperand(0).getReg();
+      if (VRegDef != RISCV::X0 &&
+          !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
+        Used.demandVL();
+
+      if (NextMI) {
+        if (!Used.usedVL() && !Used.usedVTYPE()) {
+          ToDelete.push_back(&MI);
+          // Leave NextMI unchanged
+          continue;
+        } else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
+          if (!isVLPreservingConfig(*NextMI)) {
+
+            Register DefReg = NextMI->getOperand(0).getReg();
+
+            MI.getOperand(0).setReg(DefReg);
+            MI.getOperand(0).setIsDead(false);
+
+            // The def of DefReg moved to MI, so extend the LiveInterval up to
+            // it.
+            if (DefReg.isVirtual()) {
+              LiveInterval &DefLI = LIS.getInterval(DefReg);
+              SlotIndex MISlot = LIS.getInstructionIndex(MI).getRegSlot();
+              VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
+              LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
+              DefLI.addSegment(S);
+              DefVNI->def = MISlot;
+
+              // DefReg may have had no uses, in which case we need to shrink
+              // the LiveInterval up to MI.
+              LIS.shrinkToUses(&DefLI);
----------------
lukel97 wrote:

Thanks for catching this, resetting the weight to 0 does the trick. I can't find any other targets that work around this, but I presume this is a snag that we only run into when doing split RA.

https://github.com/llvm/llvm-project/pull/88295