[llvm] r287076 - AArch64: Use DeadRegisterDefinitionsPass before regalloc.

Matthias Braun via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 15 19:38:27 PST 2016


Author: matze
Date: Tue Nov 15 21:38:27 2016
New Revision: 287076

URL: http://llvm.org/viewvc/llvm-project?rev=287076&view=rev
Log:
AArch64: Use DeadRegisterDefinitionsPass before regalloc.

Doing this before register allocation reduces register pressure as we do
not even have to allocate a register for those dead definitions.

Differential Revision: https://reviews.llvm.org/D26111

Added:
    llvm/trunk/test/CodeGen/AArch64/arm64-zeroreg.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
    llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp

Modified: llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp?rev=287076&r1=287075&r2=287076&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp Tue Nov 15 21:38:27 2016
@@ -17,9 +17,11 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "aarch64-dead-defs"
@@ -32,8 +34,9 @@ namespace {
 class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
 private:
   const TargetRegisterInfo *TRI;
+  const MachineRegisterInfo *MRI;
+  const TargetInstrInfo *TII;
   bool Changed;
-  bool implicitlyDefinesOverlappingReg(unsigned Reg, const MachineInstr &MI);
   void processMachineBasicBlock(MachineBasicBlock &MBB);
 public:
   static char ID; // Pass identification, replacement for typeid.
@@ -44,11 +47,6 @@ public:
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
-  MachineFunctionProperties getRequiredProperties() const override {
-    return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::NoVRegs);
-  }
-
   StringRef getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -62,15 +60,6 @@ char AArch64DeadRegisterDefinitions::ID
 INITIALIZE_PASS(AArch64DeadRegisterDefinitions, "aarch64-dead-defs",
                 AARCH64_DEAD_REG_DEF_NAME, false, false)
 
-bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
-    unsigned Reg, const MachineInstr &MI) {
-  for (const MachineOperand &MO : MI.implicit_operands())
-    if (MO.isReg() && MO.isDef())
-      if (TRI->regsOverlap(Reg, MO.getReg()))
-        return true;
-  return false;
-}
-
 static bool usesFrameIndex(const MachineInstr &MI) {
   for (const MachineOperand &MO : MI.uses())
     if (MO.isFI())
@@ -80,6 +69,7 @@ static bool usesFrameIndex(const Machine
 
 void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
     MachineBasicBlock &MBB) {
+  const MachineFunction &MF = *MBB.getParent();
   for (MachineInstr &MI : MBB) {
     if (usesFrameIndex(MI)) {
       // We need to skip this instruction because while it appears to have a
@@ -97,7 +87,13 @@ void AArch64DeadRegisterDefinitions::pro
     const MCInstrDesc &Desc = MI.getDesc();
     for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
       MachineOperand &MO = MI.getOperand(I);
-      if (!MO.isReg() || !MO.isDead() || !MO.isDef())
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      // We should not have any relevant physreg defs that are replacable by
+      // zero before register allocation. So we just check for dead vreg defs.
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+          (!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
         continue;
       assert(!MO.isImplicit() && "Unexpected implicit def!");
       DEBUG(dbgs() << "  Dead def operand #" << I << " in:\n    ";
@@ -107,28 +103,22 @@ void AArch64DeadRegisterDefinitions::pro
         DEBUG(dbgs() << "    Ignoring, def is tied operand.\n");
         continue;
       }
-      // Don't change the register if there's an implicit def of a subreg or
-      // superreg.
-      if (implicitlyDefinesOverlappingReg(MO.getReg(), MI)) {
-        DEBUG(dbgs() << "    Ignoring, implicitly defines overlap reg.\n");
-        continue;
-      }
-      // Make sure the instruction take a register class that contains
-      // the zero register and replace it if so.
+      const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
       unsigned NewReg;
-      switch (Desc.OpInfo[I].RegClass) {
-      default:
+      if (RC == nullptr) {
         DEBUG(dbgs() << "    Ignoring, register is not a GPR.\n");
         continue;
-      case AArch64::GPR32RegClassID:
+      } else if (RC->contains(AArch64::WZR))
         NewReg = AArch64::WZR;
-        break;
-      case AArch64::GPR64RegClassID:
+      else if (RC->contains(AArch64::XZR))
         NewReg = AArch64::XZR;
-        break;
+      else {
+        DEBUG(dbgs() << "    Ignoring, register is not a GPR.\n");
+        continue;
       }
       DEBUG(dbgs() << "    Replacing with zero register. New:\n      ");
       MO.setReg(NewReg);
+      MO.setIsDead();
       DEBUG(MI.print(dbgs()));
       ++NumDeadDefsReplaced;
       Changed = true;
@@ -145,6 +135,8 @@ bool AArch64DeadRegisterDefinitions::run
     return false;
 
   TRI = MF.getSubtarget().getRegisterInfo();
+  TII = MF.getSubtarget().getInstrInfo();
+  MRI = &MF.getRegInfo();
   DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
   Changed = false;
   for (auto &MBB : MF)

Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=287076&r1=287075&r2=287076&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Tue Nov 15 21:38:27 2016
@@ -434,6 +434,10 @@ bool AArch64PassConfig::addILPOpts() {
 }
 
 void AArch64PassConfig::addPreRegAlloc() {
+  // Change dead register definitions to refer to the zero register.
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
+    addPass(createAArch64DeadRegisterDefinitions());
+
   // Use AdvSIMD scalar instructions whenever profitable.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
     addPass(createAArch64AdvSIMDScalar());
@@ -448,9 +452,6 @@ void AArch64PassConfig::addPostRegAlloc(
   if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
     addPass(createAArch64RedundantCopyEliminationPass());
 
-  // Change dead register definitions to refer to the zero register.
-  if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
-    addPass(createAArch64DeadRegisterDefinitions());
   if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
     // Improve performance for some FP/SIMD code for A57.
     addPass(createAArch64A57FPLoadBalancing());

Added: llvm/trunk/test/CodeGen/AArch64/arm64-zeroreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-zeroreg.ll?rev=287076&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-zeroreg.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-zeroreg.ll Tue Nov 15 21:38:27 2016
@@ -0,0 +1,91 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "aarch64--"
+
+declare void @begin()
+declare void @end()
+
+; Test that we use the zero register before regalloc and do not unnecessarily
+; clobber a register with the SUBS (cmp) instruction.
+; CHECK-LABEL: func:
+define void @func(i64* %addr) {
+  ; We should not see any spills or reloads between begin and end
+  ; CHECK: bl begin
+  ; CHECK-NOT: str{{.*}}sp
+  ; CHECK-NOT: Folded Spill
+  ; CHECK-NOT: ldr{{.*}}sp
+  ; CHECK-NOT: Folded Reload
+  call void @begin()
+  %v0 = load volatile i64, i64* %addr  
+  %v1 = load volatile i64, i64* %addr  
+  %v2 = load volatile i64, i64* %addr  
+  %v3 = load volatile i64, i64* %addr  
+  %v4 = load volatile i64, i64* %addr  
+  %v5 = load volatile i64, i64* %addr  
+  %v6 = load volatile i64, i64* %addr  
+  %v7 = load volatile i64, i64* %addr  
+  %v8 = load volatile i64, i64* %addr  
+  %v9 = load volatile i64, i64* %addr  
+  %v10 = load volatile i64, i64* %addr  
+  %v11 = load volatile i64, i64* %addr  
+  %v12 = load volatile i64, i64* %addr  
+  %v13 = load volatile i64, i64* %addr  
+  %v14 = load volatile i64, i64* %addr  
+  %v15 = load volatile i64, i64* %addr  
+  %v16 = load volatile i64, i64* %addr  
+  %v17 = load volatile i64, i64* %addr  
+  %v18 = load volatile i64, i64* %addr  
+  %v19 = load volatile i64, i64* %addr  
+  %v20 = load volatile i64, i64* %addr
+  %v21 = load volatile i64, i64* %addr
+  %v22 = load volatile i64, i64* %addr
+  %v23 = load volatile i64, i64* %addr
+  %v24 = load volatile i64, i64* %addr
+  %v25 = load volatile i64, i64* %addr
+  %v26 = load volatile i64, i64* %addr
+  %v27 = load volatile i64, i64* %addr
+  %v28 = load volatile i64, i64* %addr
+  %v29 = load volatile i64, i64* %addr
+
+  %c = icmp eq i64 %v0, %v1
+  br i1 %c, label %if.then, label %if.end
+
+if.then:
+  store volatile i64 %v2, i64* %addr
+  br label %if.end
+
+if.end:
+  store volatile i64 %v0, i64* %addr
+  store volatile i64 %v1, i64* %addr
+  store volatile i64 %v2, i64* %addr
+  store volatile i64 %v3, i64* %addr
+  store volatile i64 %v4, i64* %addr
+  store volatile i64 %v5, i64* %addr
+  store volatile i64 %v6, i64* %addr
+  store volatile i64 %v7, i64* %addr
+  store volatile i64 %v8, i64* %addr
+  store volatile i64 %v9, i64* %addr
+  store volatile i64 %v10, i64* %addr
+  store volatile i64 %v11, i64* %addr
+  store volatile i64 %v12, i64* %addr
+  store volatile i64 %v13, i64* %addr
+  store volatile i64 %v14, i64* %addr
+  store volatile i64 %v15, i64* %addr
+  store volatile i64 %v16, i64* %addr
+  store volatile i64 %v17, i64* %addr
+  store volatile i64 %v18, i64* %addr
+  store volatile i64 %v19, i64* %addr
+  store volatile i64 %v20, i64* %addr
+  store volatile i64 %v21, i64* %addr
+  store volatile i64 %v22, i64* %addr
+  store volatile i64 %v23, i64* %addr
+  store volatile i64 %v24, i64* %addr
+  store volatile i64 %v25, i64* %addr
+  store volatile i64 %v26, i64* %addr
+  store volatile i64 %v27, i64* %addr
+  store volatile i64 %v28, i64* %addr
+  store volatile i64 %v29, i64* %addr
+  ; CHECK: bl end
+  call void @end()
+
+  ret void
+}




More information about the llvm-commits mailing list