[llvm] [PPC]Optimize zeroing accumulator and spilling instructions into simple instructions (PR #96094)

Chen Zheng via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 24 20:26:51 PDT 2024


================
@@ -109,6 +109,93 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
           MachineFunctionProperties::Property::NoVRegs);
     }
 
+    // The funtion will simply the zeroing accumulator and spilling instrcutions
+    // into simple xxlxor and spilling instrcuctions.
+    // From:
+    // setaccz acci
+    // xxmfacc acci
+    // stxv vsr(i*4+0), D(1)
+    // stxv vsr(i*4+1), D-16(1)
+    // stxv vsr(i*4+2), D-32(1)
+    // stxv vsr(i*4+3), D-48(1)
+
+    // To:
+    // xxlxor vsr(i*4), 0, 0
+    // stxv vsr(i*4), D(1)
+    // stxv vsr(i*4), D-16(1)
+    // stxv vsr(i*4), D-32(1)
+    // stxv vsr(i*4), D-48(1)
+    bool
+    OptimizeZeroingAccumulatorSpilling(MachineBasicBlock &MBB,
+                                       const TargetRegisterInfo *TRI) const {
+      bool changed = false;
+      for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
+        if (BBI->getOpcode() != PPC::XXSETACCZ)
+          continue;
+
+        Register ACCZReg = BBI->getOperand(0).getReg();
+
+        DenseSet<MachineInstr *> InstrsToErase;
+        InstrsToErase.insert(&*BBI++);
+
+        if (BBI->getOpcode() != PPC::XXMFACC) {
+	  --BBI;
+          continue;
+	}
+
+        Register ACCWReg = BBI->getOperand(0).getReg();
+
+        if (ACCWReg != ACCZReg) 
+          continue;
+
+        auto XXMFACCInstr = BBI;
+        InstrsToErase.insert(&*BBI++);
+
+        Register VSLRegBase = (ACCWReg - PPC::ACC0) * 4 + PPC::VSL0;
+        bool isVSLRegBaseKilled = false;
+        for (unsigned InstrCount = 0; InstrCount < 4; ++InstrCount, ++BBI) {
+          if (BBI->getOpcode() == PPC::STXV) {
+            Register Reg0 = BBI->getOperand(0).getReg();
+            // If the VSLRegBase Register is killed, we put the kill in the
+            // last STXV instruction.
+            if (Reg0 == VSLRegBase && BBI->getOperand(0).isKill())
+              isVSLRegBaseKilled = true;
+            if (Reg0 < VSLRegBase || Reg0 > VSLRegBase + 3)
+              continue;
+          } else {
+	      --BBI;
+              continue;
+	  }
+          }
+
+          BBI = XXMFACCInstr;
+          BBI++;
+          for (unsigned InstrCount = 0; InstrCount < 4; ++InstrCount, ++BBI) {
+            Register VSLiReg = BBI->getOperand(0).getReg();
+            BBI->substituteRegister(VSLiReg, VSLRegBase, 0, *TRI);
+            BBI->getOperand(0).setIsKill(false);
+          }
+
+          if (isVSLRegBaseKilled)
+            (--BBI)->getOperand(0).setIsKill(true);
+
+          DebugLoc DL = XXMFACCInstr->getDebugLoc();
+          const PPCInstrInfo *TII = XXMFACCInstr->getMF()
+                                        ->getSubtarget<PPCSubtarget>()
+                                        .getInstrInfo();
+
+          BuildMI(MBB, &*XXMFACCInstr, DL, TII->get(PPC::XXLXOR), VSLRegBase)
+              .addReg(VSLRegBase,RegState::Undef)
+              .addReg(VSLRegBase,RegState::Undef);
+
+          for (MachineInstr *MI : InstrsToErase)
+            MI->eraseFromParent();
----------------
chenzheng1030 wrote:

Maybe we can put this function outside of the main loop? i.e., we collect all the unneeded instructions and remove them all just before this function returns.

https://github.com/llvm/llvm-project/pull/96094


More information about the llvm-commits mailing list