[llvm] r261004 - [AArch64] Add pass to remove redundant copy after RA

Jun Bum Lim via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 16 12:02:39 PST 2016


Author: junbuml
Date: Tue Feb 16 14:02:39 2016
New Revision: 261004

URL: http://llvm.org/viewvc/llvm-project?rev=261004&view=rev
Log:
[AArch64] Add pass to remove redundant copy after RA

Summary:
This change will add a pass to remove unnecessary zero copies in target blocks
of cbz/cbnz instructions. E.g., the copy instruction in the code below can be
removed because the cbz jumps to BB1 when x0 is zero :
  BB0:
    cbz x0, .BB1
  BB1:
    mov x0, xzr

Jun

Reviewers: gberry, jmolloy, HaoLiu, MatzeB, mcrosier

Subscribers: mcrosier, mssimpso, haicheng, bmakam, llvm-commits, aemerson, rengolin

Differential Revision: http://reviews.llvm.org/D16203

Added:
    llvm/trunk/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
    llvm/trunk/test/CodeGen/AArch64/machine-copy-remove.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64.h
    llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
    llvm/trunk/lib/Target/AArch64/CMakeLists.txt

Modified: llvm/trunk/lib/Target/AArch64/AArch64.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.h?rev=261004&r1=261003&r2=261004&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.h Tue Feb 16 14:02:39 2016
@@ -27,6 +27,7 @@ class FunctionPass;
 class MachineFunctionPass;
 
 FunctionPass *createAArch64DeadRegisterDefinitions();
+FunctionPass *createAArch64RedundantCopyEliminationPass();
 FunctionPass *createAArch64ConditionalCompares();
 FunctionPass *createAArch64AdvSIMDScalar();
 FunctionPass *createAArch64BranchRelaxation();

Added: llvm/trunk/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp?rev=261004&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp (added)
+++ llvm/trunk/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp Tue Feb 16 14:02:39 2016
@@ -0,0 +1,170 @@
+//=- AArch64RedundantCopyElimination.cpp - Remove useless copy for AArch64 -=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This pass removes unnecessary zero copies in BBs that are targets of
+// cbz/cbnz instructions. For instance, the copy instruction in the code below
+// can be removed because the CBZW jumps to BB#2 when W0 is zero.
+//  BB#1:
+//    CBZW %W0, <BB#2>
+//  BB#2:
+//    %W0 = COPY %WZR
+// This pass should be run after register allocation.
+//
+// FIXME: This should be extended to handle any constant other than zero. E.g.,
+//   cmp w0, #1
+//     b.eq .BB1
+//   BB1:
+//     mov w0, #1
+//
+// FIXME: This could also be extended to check the whole dominance subtree below
+// the comparison if the compile time regression is acceptable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-copyelim"
+
+STATISTIC(NumCopiesRemoved, "Number of copies removed.");
+
+namespace llvm {
+void initializeAArch64RedundantCopyEliminationPass(PassRegistry &);
+}
+
+namespace {
+class AArch64RedundantCopyElimination : public MachineFunctionPass {
+  const MachineRegisterInfo *MRI;
+  const TargetRegisterInfo *TRI;
+
+public:
+  static char ID;
+  AArch64RedundantCopyElimination() : MachineFunctionPass(ID) {}
+  bool optimizeCopy(MachineBasicBlock *MBB);
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  const char *getPassName() const override {
+    return "AArch64 Redundant Copy Elimination";
+  }
+};
+char AArch64RedundantCopyElimination::ID = 0;
+}
+
+INITIALIZE_PASS(AArch64RedundantCopyElimination, "aarch64-copyelim",
+                "AArch64 redundant copy elimination pass", false, false)
+
+bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
+  // Check if the current basic block has a single predecessor.
+  if (MBB->pred_size() != 1)
+    return false;
+
+  MachineBasicBlock *PredMBB = *MBB->pred_begin();
+  MachineBasicBlock::iterator CompBr = PredMBB->getLastNonDebugInstr();
+  if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2)
+    return false;
+
+  unsigned LastOpc = CompBr->getOpcode();
+  // Check if the current basic block is the target block to which the cbz/cbnz
+  // instruction jumps when its Wt/Xt is zero.
+  if (LastOpc == AArch64::CBZW || LastOpc == AArch64::CBZX) {
+    if (MBB != CompBr->getOperand(1).getMBB())
+      return false;
+  } else if (LastOpc == AArch64::CBNZW || LastOpc == AArch64::CBNZX) {
+    if (MBB == CompBr->getOperand(1).getMBB())
+      return false;
+  } else {
+    return false;
+  }
+
+  unsigned TargetReg = CompBr->getOperand(0).getReg();
+  if (!TargetReg)
+    return false;
+  assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) &&
+         "Expect physical register");
+
+  // Remember all registers aliasing with TargetReg.
+  SmallSetVector<unsigned, 8> TargetRegs;
+  for (MCRegAliasIterator AI(TargetReg, TRI, true); AI.isValid(); ++AI)
+    TargetRegs.insert(*AI);
+
+  bool Changed = false;
+  // Remove redundant Copy instructions unless TargetReg is modified.
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
+    MachineInstr *MI = &*I;
+    ++I;
+    if (MI->isCopy() && MI->getOperand(0).isReg() &&
+        MI->getOperand(1).isReg()) {
+
+      unsigned DefReg = MI->getOperand(0).getReg();
+      unsigned SrcReg = MI->getOperand(1).getReg();
+
+      if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) &&
+          !MRI->isReserved(DefReg) &&
+          (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) {
+
+        CompBr->clearRegisterKills(DefReg, TRI);
+        if (MBB->isLiveIn(DefReg))
+          // Clear any kills of TargetReg between CompBr and MI.
+          for (MachineInstr &MMI :
+               make_range(MBB->begin()->getIterator(), MI->getIterator()))
+            MMI.clearRegisterKills(DefReg, TRI);
+        else
+          MBB->addLiveIn(DefReg);
+
+        DEBUG(dbgs() << "Remove redundant Copy : ");
+        DEBUG((MI)->print(dbgs()));
+
+        MI->eraseFromParent();
+        Changed = true;
+        NumCopiesRemoved++;
+        continue;
+      }
+    }
+
+    for (const MachineOperand &MO : MI->operands()) {
+      // FIXME: It is possible to use the register mask to check if all
+      // registers in TargetRegs are not clobbered. For now, we treat it like
+      // a basic block boundary.
+      if (MO.isRegMask())
+        return Changed;
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+
+      if (!Reg)
+        continue;
+
+      assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+             "Expect physical register");
+
+      // Stop if the TargetReg is modified.
+      if (MO.isDef() && TargetRegs.count(Reg))
+        return Changed;
+    }
+  }
+  return Changed;
+}
+
+bool AArch64RedundantCopyElimination::runOnMachineFunction(
+    MachineFunction &MF) {
+  TRI = MF.getSubtarget().getRegisterInfo();
+  MRI = &MF.getRegInfo();
+  bool Changed = false;
+  for (MachineBasicBlock &MBB : MF)
+    Changed |= optimizeCopy(&MBB);
+  return Changed;
+}
+
+FunctionPass *llvm::createAArch64RedundantCopyEliminationPass() {
+  return new AArch64RedundantCopyElimination();
+}

Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=261004&r1=261003&r2=261004&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Tue Feb 16 14:02:39 2016
@@ -61,6 +61,11 @@ EnableDeadRegisterElimination("aarch64-d
                               cl::init(true));
 
 static cl::opt<bool>
+EnableRedundantCopyElimination("aarch64-redundant-copy-elim",
+              cl::desc("Enable the redundant copy elimination pass"),
+              cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
 EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair"
                    " optimization pass"), cl::init(true), cl::Hidden);
 
@@ -316,6 +321,10 @@ void AArch64PassConfig::addPreRegAlloc()
 }
 
 void AArch64PassConfig::addPostRegAlloc() {
+  // Remove redundant copy instructions.
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
+    addPass(createAArch64RedundantCopyEliminationPass());
+
   // Change dead register definitions to refer to the zero register.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
     addPass(createAArch64DeadRegisterDefinitions());

Modified: llvm/trunk/lib/Target/AArch64/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/CMakeLists.txt?rev=261004&r1=261003&r2=261004&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/AArch64/CMakeLists.txt Tue Feb 16 14:02:39 2016
@@ -30,6 +30,7 @@ add_llvm_target(AArch64CodeGen
   AArch64A53Fix835769.cpp
   AArch64FrameLowering.cpp
   AArch64ConditionOptimizer.cpp
+  AArch64RedundantCopyElimination.cpp
   AArch64ISelDAGToDAG.cpp
   AArch64ISelLowering.cpp
   AArch64InstrInfo.cpp

Added: llvm/trunk/test/CodeGen/AArch64/machine-copy-remove.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/machine-copy-remove.ll?rev=261004&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/machine-copy-remove.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/machine-copy-remove.ll Tue Feb 16 14:02:39 2016
@@ -0,0 +1,75 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: f_XX
+; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
+; CHECK: [[BB]]:
+; CHECK-NOT: mov x[[REG]], xzr
+define i64 @f_XX(i64 %n, i64* nocapture readonly %P) {
+entry:
+  %tobool = icmp eq i64 %n, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i64, i64* %P
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %a.0 = phi i64 [ %0, %if.then ], [ 0, %entry ]
+  ret i64 %a.0
+}
+
+; CHECK-LABEL: f_WW
+; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]]
+; CHECK: [[BB]]:
+; CHECK-NOT: mov w[[REG]], wzr
+define i32 @f_WW(i32 %n, i32* nocapture readonly %P) {
+entry:
+  %tobool = icmp eq i32 %n, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i32, i32* %P
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %a.0 = phi i32 [ %0, %if.then ], [ 0, %entry ]
+  ret i32 %a.0
+}
+
+; CHECK-LABEL: f_XW
+; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
+; CHECK: [[BB]]:
+; CHECK-NOT: mov w[[REG]], wzr
+define i32 @f_XW(i64 %n, i32* nocapture readonly %P) {
+entry:
+  %tobool = icmp eq i64 %n, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i32, i32* %P
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %a.0 = phi i32 [ %0, %if.then ], [ 0, %entry ]
+  ret i32 %a.0
+}
+
+; CHECK-LABEL: f_WX
+; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]]
+; CHECK: [[BB]]:
+; CHECK: mov x[[REG]], xzr
+; Do not remove the mov in this case because we do not know if the upper bits
+; of the X register are zero.
+define i64 @f_WX(i32 %n, i64* nocapture readonly %P) {
+entry:
+  %tobool = icmp eq i32 %n, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i64, i64* %P
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %a.0 = phi i64 [ %0, %if.then ], [ 0, %entry ]
+  ret i64 %a.0
+}




More information about the llvm-commits mailing list