[llvm] 18db29e - [PowerPC] Add peephole to remove redundant accumulator prime/unprime instructions

Baptiste Saleil via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 18 13:01:26 PST 2020


Author: Baptiste Saleil
Date: 2020-11-18T15:01:07-06:00
New Revision: 18db29ea6fb6231221da412dbf07c186401bc496

URL: https://github.com/llvm/llvm-project/commit/18db29ea6fb6231221da412dbf07c186401bc496
DIFF: https://github.com/llvm/llvm-project/commit/18db29ea6fb6231221da412dbf07c186401bc496.diff

LOG: [PowerPC] Add peephole to remove redundant accumulator prime/unprime instructions

In some situations, the compiler may insert an accumulator prime instruction and
an accumulator unprime instruction with no use of that accumulator between the two.
That's for example the case when we store an accumulator after assembling it or
restoring it. This patch adds a peephole to remove these prime and unprime instructions.

Differential Revision: https://reviews.llvm.org/D91386

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
    llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
    llvm/test/CodeGen/PowerPC/mma-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 7d1282df369b..04749cdd61c4 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -349,6 +349,64 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
       return MadeChange;
     }
 
+    // This function removes redundant pairs of accumulator prime/unprime
+    // instructions. In some situations, it's possible the compiler inserts an
+    // accumulator prime instruction followed by an unprime instruction (e.g.
+    // when we store an accumulator after restoring it from a spill). If the
+    // accumulator is not used between the two, they can be removed. This
+    // function removes these redundant pairs from basic blocks.
+    // The algorithm is quite straightforward - every time we encounter a prime
+    // instruction, the primed register is added to a candidate set. Any use
+    // other than a prime removes the candidate from the set and any de-prime
+    // of a current candidate marks both the prime and de-prime for removal.
+    // This way we ensure we only remove prime/de-prime *pairs* with no
+    // intervening uses.
+    bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
+      DenseSet<MachineInstr *> InstrsToErase;
+      // Initially, none of the acc registers are candidates.
+      SmallVector<MachineInstr *, 8> Candidates(
+          PPC::UACCRCRegClass.getNumRegs(), nullptr);
+
+      for (MachineInstr &BBI : MBB.instrs()) {
+        unsigned Opc = BBI.getOpcode();
+        // If we are visiting a xxmtacc instruction, we add it and its operand
+        // register to the candidate set.
+        if (Opc == PPC::XXMTACC) {
+          Register Acc = BBI.getOperand(0).getReg();
+          assert(PPC::ACCRCRegClass.contains(Acc) &&
+                 "Unexpected register for XXMTACC");
+          Candidates[Acc - PPC::ACC0] = &BBI;
+        }
+        // If we are visiting a xxmfacc instruction and its operand register is
+        // in the candidate set, we mark the two instructions for removal.
+        else if (Opc == PPC::XXMFACC) {
+          Register Acc = BBI.getOperand(0).getReg();
+          assert(PPC::ACCRCRegClass.contains(Acc) &&
+                 "Unexpected register for XXMFACC");
+          if (!Candidates[Acc - PPC::ACC0])
+            continue;
+          InstrsToErase.insert(&BBI);
+          InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
+        }
+        // If we are visiting an instruction using an accumulator register
+        // as operand, we remove it from the candidate set.
+        else {
+          for (MachineOperand &Operand : BBI.operands()) {
+            if (!Operand.isReg())
+              continue;
+            Register Reg = Operand.getReg();
+            if (PPC::ACCRCRegClass.contains(Reg))
+              Candidates[Reg - PPC::ACC0] = nullptr;
+          }
+        }
+      }
+
+      for (MachineInstr *MI : InstrsToErase)
+        MI->eraseFromParent();
+      NumRemovedInPreEmit += InstrsToErase.size();
+      return !InstrsToErase.empty();
+    }
+
     bool runOnMachineFunction(MachineFunction &MF) override {
       if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
         // Remove UNENCODED_NOP even when this pass is disabled.
@@ -370,6 +428,7 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
       for (MachineBasicBlock &MBB : MF) {
         Changed |= removeRedundantLIs(MBB, TRI);
         Changed |= addLinkerOpt(MBB, TRI);
+        Changed |= removeAccPrimeUnprime(MBB);
         for (MachineInstr &MI : MBB) {
           unsigned Opc = MI.getOpcode();
           if (Opc == PPC::UNENCODED_NOP) {

diff  --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
index 0b19fc4ab446..99e731c9127f 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -16,8 +16,6 @@ define void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
 ; LE-PAIRED-NEXT:    plxv vs0, f at PCREL+112(0), 1
 ; LE-PAIRED-NEXT:    plxv vs3, f at PCREL+64(0), 1
 ; LE-PAIRED-NEXT:    plxv vs2, f at PCREL+80(0), 1
-; LE-PAIRED-NEXT:    xxmtacc acc0
-; LE-PAIRED-NEXT:    xxmfacc acc0
 ; LE-PAIRED-NEXT:    pstxv vs0, f at PCREL+176(0), 1
 ; LE-PAIRED-NEXT:    pstxv vs1, f at PCREL+160(0), 1
 ; LE-PAIRED-NEXT:    pstxv vs2, f at PCREL+144(0), 1
@@ -32,8 +30,6 @@ define void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PAIRED-NEXT:    lxv vs0, 64(r3)
 ; BE-PAIRED-NEXT:    lxv vs3, 112(r3)
 ; BE-PAIRED-NEXT:    lxv vs2, 96(r3)
-; BE-PAIRED-NEXT:    xxmtacc acc0
-; BE-PAIRED-NEXT:    xxmfacc acc0
 ; BE-PAIRED-NEXT:    stxv vs1, 144(r3)
 ; BE-PAIRED-NEXT:    stxv vs0, 128(r3)
 ; BE-PAIRED-NEXT:    stxv vs3, 176(r3)
@@ -58,8 +54,6 @@ define void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
 ; LE-PAIRED-NEXT:    lxvx vs3, r5, r3
 ; LE-PAIRED-NEXT:    lxv vs2, 16(r6)
 ; LE-PAIRED-NEXT:    sldi r3, r4, 6
-; LE-PAIRED-NEXT:    xxmtacc acc0
-; LE-PAIRED-NEXT:    xxmfacc acc0
 ; LE-PAIRED-NEXT:    stxvx vs3, r5, r3
 ; LE-PAIRED-NEXT:    add r3, r5, r3
 ; LE-PAIRED-NEXT:    stxv vs0, 48(r3)
@@ -78,8 +72,6 @@ define void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
 ; BE-PAIRED-NEXT:    lxv vs1, 16(r6)
 ; BE-PAIRED-NEXT:    lxv vs3, 48(r6)
 ; BE-PAIRED-NEXT:    lxv vs2, 32(r6)
-; BE-PAIRED-NEXT:    xxmtacc acc0
-; BE-PAIRED-NEXT:    xxmfacc acc0
 ; BE-PAIRED-NEXT:    stxvx vs0, r5, r3
 ; BE-PAIRED-NEXT:    add r3, r5, r3
 ; BE-PAIRED-NEXT:    stxv vs1, 16(r3)
@@ -101,8 +93,6 @@ define void @testUnalignedLdSt() {
 ; LE-PAIRED-NEXT:    plxv vs0, f at PCREL+59(0), 1
 ; LE-PAIRED-NEXT:    plxv vs3, f at PCREL+11(0), 1
 ; LE-PAIRED-NEXT:    plxv vs2, f at PCREL+27(0), 1
-; LE-PAIRED-NEXT:    xxmtacc acc0
-; LE-PAIRED-NEXT:    xxmfacc acc0
 ; LE-PAIRED-NEXT:    pstxv vs0, f at PCREL+67(0), 1
 ; LE-PAIRED-NEXT:    pstxv vs1, f at PCREL+51(0), 1
 ; LE-PAIRED-NEXT:    pstxv vs2, f at PCREL+35(0), 1
@@ -122,8 +112,6 @@ define void @testUnalignedLdSt() {
 ; BE-PAIRED-NEXT:    li r4, 59
 ; BE-PAIRED-NEXT:    lxvx vs3, r3, r4
 ; BE-PAIRED-NEXT:    li r4, 35
-; BE-PAIRED-NEXT:    xxmtacc acc0
-; BE-PAIRED-NEXT:    xxmfacc acc0
 ; BE-PAIRED-NEXT:    stxvx vs1, r3, r4
 ; BE-PAIRED-NEXT:    li r4, 19
 ; BE-PAIRED-NEXT:    stxvx vs0, r3, r4

diff  --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index 0eb633ab3f2c..cbc7bd9cd0d7 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -16,8 +16,6 @@ define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ; CHECK-NEXT:    xxlor vs1, v3, v3
 ; CHECK-NEXT:    xxlor vs2, v2, v2
 ; CHECK-NEXT:    xxlor vs3, v3, v3
-; CHECK-NEXT:    xxmtacc acc0
-; CHECK-NEXT:    xxmfacc acc0
 ; CHECK-NEXT:    stxv vs0, 48(r3)
 ; CHECK-NEXT:    stxv vs1, 32(r3)
 ; CHECK-NEXT:    stxv vs2, 16(r3)
@@ -31,8 +29,6 @@ define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ; CHECK-BE-NEXT:    xxlor vs1, v3, v3
 ; CHECK-BE-NEXT:    xxlor vs2, v2, v2
 ; CHECK-BE-NEXT:    xxlor vs3, v3, v3
-; CHECK-BE-NEXT:    xxmtacc acc0
-; CHECK-BE-NEXT:    xxmfacc acc0
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)
@@ -77,8 +73,6 @@ define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ; CHECK-NEXT:    xxlor vs2, v2, v2
 ; CHECK-NEXT:    xxlor vs3, v3, v3
 ; CHECK-NEXT:    xxmtacc acc0
-; CHECK-NEXT:    xxmtacc acc0
-; CHECK-NEXT:    xxmfacc acc0
 ; CHECK-NEXT:    stxv vs0, 48(r3)
 ; CHECK-NEXT:    stxv vs1, 32(r3)
 ; CHECK-NEXT:    stxv vs2, 16(r3)
@@ -93,8 +87,6 @@ define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ; CHECK-BE-NEXT:    xxlor vs2, v2, v2
 ; CHECK-BE-NEXT:    xxlor vs3, v3, v3
 ; CHECK-BE-NEXT:    xxmtacc acc0
-; CHECK-BE-NEXT:    xxmtacc acc0
-; CHECK-BE-NEXT:    xxmfacc acc0
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)
@@ -119,9 +111,6 @@ define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ; CHECK-NEXT:    xxlor vs1, v3, v3
 ; CHECK-NEXT:    xxlor vs2, v2, v2
 ; CHECK-NEXT:    xxlor vs3, v3, v3
-; CHECK-NEXT:    xxmtacc acc0
-; CHECK-NEXT:    xxmfacc acc0
-; CHECK-NEXT:    xxmfacc acc0
 ; CHECK-NEXT:    stxv vs0, 48(r3)
 ; CHECK-NEXT:    stxv vs1, 32(r3)
 ; CHECK-NEXT:    stxv vs2, 16(r3)
@@ -135,9 +124,6 @@ define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ; CHECK-BE-NEXT:    xxlor vs1, v3, v3
 ; CHECK-BE-NEXT:    xxlor vs2, v2, v2
 ; CHECK-BE-NEXT:    xxlor vs3, v3, v3
-; CHECK-BE-NEXT:    xxmtacc acc0
-; CHECK-BE-NEXT:    xxmfacc acc0
-; CHECK-BE-NEXT:    xxmfacc acc0
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)
@@ -262,8 +248,6 @@ define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
 ; CHECK-NEXT:    xvi4ger8pp acc0, v2, v2
 ; CHECK-NEXT:  .LBB7_3: # %if.end
 ; CHECK-NEXT:    xxmfacc acc0
-; CHECK-NEXT:    xxmtacc acc0
-; CHECK-NEXT:    xxmfacc acc0
 ; CHECK-NEXT:    stxv vs0, 48(r3)
 ; CHECK-NEXT:    stxv vs1, 32(r3)
 ; CHECK-NEXT:    stxv vs2, 16(r3)
@@ -286,8 +270,6 @@ define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
 ; CHECK-BE-NEXT:    xvi4ger8pp acc0, v2, v2
 ; CHECK-BE-NEXT:  .LBB7_3: # %if.end
 ; CHECK-BE-NEXT:    xxmfacc acc0
-; CHECK-BE-NEXT:    xxmtacc acc0
-; CHECK-BE-NEXT:    xxmfacc acc0
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)
@@ -637,8 +619,6 @@ define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind
 ; CHECK-NEXT:    lxvp vsp0, r1(r3)
 ; CHECK-NEXT:    li r3, 32
 ; CHECK-NEXT:    lxvp vsp2, r1(r3)
-; CHECK-NEXT:    xxmtacc acc0
-; CHECK-NEXT:    xxmfacc acc0
 ; CHECK-NEXT:    stxv vs0, 112(r30)
 ; CHECK-NEXT:    stxv vs1, 96(r30)
 ; CHECK-NEXT:    stxv vs2, 80(r30)
@@ -675,8 +655,6 @@ define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind
 ; CHECK-BE-NEXT:    lxvp vsp0, r1(r3)
 ; CHECK-BE-NEXT:    li r3, 144
 ; CHECK-BE-NEXT:    lxvp vsp2, r1(r3)
-; CHECK-BE-NEXT:    xxmtacc acc0
-; CHECK-BE-NEXT:    xxmfacc acc0
 ; CHECK-BE-NEXT:    stxv vs3, 112(r30)
 ; CHECK-BE-NEXT:    stxv vs2, 96(r30)
 ; CHECK-BE-NEXT:    stxv vs1, 80(r30)


        


More information about the llvm-commits mailing list