[llvm] r287076 - AArch64: Use DeadRegisterDefinitionsPass before regalloc.
Matthias Braun via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 15 19:38:27 PST 2016
Author: matze
Date: Tue Nov 15 21:38:27 2016
New Revision: 287076
URL: http://llvm.org/viewvc/llvm-project?rev=287076&view=rev
Log:
AArch64: Use DeadRegisterDefinitionsPass before regalloc.
Doing this before register allocation reduces register pressure as we do
not even have to allocate a register for those dead definitions.
Differential Revision: https://reviews.llvm.org/D26111
Added:
llvm/trunk/test/CodeGen/AArch64/arm64-zeroreg.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
Modified: llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp?rev=287076&r1=287075&r2=287076&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp Tue Nov 15 21:38:27 2016
@@ -17,9 +17,11 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-dead-defs"
@@ -32,8 +34,9 @@ namespace {
class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
bool Changed;
- bool implicitlyDefinesOverlappingReg(unsigned Reg, const MachineInstr &MI);
void processMachineBasicBlock(MachineBasicBlock &MBB);
public:
static char ID; // Pass identification, replacement for typeid.
@@ -44,11 +47,6 @@ public:
bool runOnMachineFunction(MachineFunction &F) override;
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
-
StringRef getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -62,15 +60,6 @@ char AArch64DeadRegisterDefinitions::ID
INITIALIZE_PASS(AArch64DeadRegisterDefinitions, "aarch64-dead-defs",
AARCH64_DEAD_REG_DEF_NAME, false, false)
-bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
- unsigned Reg, const MachineInstr &MI) {
- for (const MachineOperand &MO : MI.implicit_operands())
- if (MO.isReg() && MO.isDef())
- if (TRI->regsOverlap(Reg, MO.getReg()))
- return true;
- return false;
-}
-
static bool usesFrameIndex(const MachineInstr &MI) {
for (const MachineOperand &MO : MI.uses())
if (MO.isFI())
@@ -80,6 +69,7 @@ static bool usesFrameIndex(const Machine
void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
for (MachineInstr &MI : MBB) {
if (usesFrameIndex(MI)) {
// We need to skip this instruction because while it appears to have a
@@ -97,7 +87,13 @@ void AArch64DeadRegisterDefinitions::pro
const MCInstrDesc &Desc = MI.getDesc();
for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDead() || !MO.isDef())
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ // We should not have any relevant physreg defs that are replacable by
+ // zero before register allocation. So we just check for dead vreg defs.
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ (!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
continue;
assert(!MO.isImplicit() && "Unexpected implicit def!");
DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
@@ -107,28 +103,22 @@ void AArch64DeadRegisterDefinitions::pro
DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
continue;
}
- // Don't change the register if there's an implicit def of a subreg or
- // superreg.
- if (implicitlyDefinesOverlappingReg(MO.getReg(), MI)) {
- DEBUG(dbgs() << " Ignoring, implicitly defines overlap reg.\n");
- continue;
- }
- // Make sure the instruction take a register class that contains
- // the zero register and replace it if so.
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
unsigned NewReg;
- switch (Desc.OpInfo[I].RegClass) {
- default:
+ if (RC == nullptr) {
DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
continue;
- case AArch64::GPR32RegClassID:
+ } else if (RC->contains(AArch64::WZR))
NewReg = AArch64::WZR;
- break;
- case AArch64::GPR64RegClassID:
+ else if (RC->contains(AArch64::XZR))
NewReg = AArch64::XZR;
- break;
+ else {
+ DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
+ continue;
}
DEBUG(dbgs() << " Replacing with zero register. New:\n ");
MO.setReg(NewReg);
+ MO.setIsDead();
DEBUG(MI.print(dbgs()));
++NumDeadDefsReplaced;
Changed = true;
@@ -145,6 +135,8 @@ bool AArch64DeadRegisterDefinitions::run
return false;
TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
Changed = false;
for (auto &MBB : MF)
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=287076&r1=287075&r2=287076&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Tue Nov 15 21:38:27 2016
@@ -434,6 +434,10 @@ bool AArch64PassConfig::addILPOpts() {
}
void AArch64PassConfig::addPreRegAlloc() {
+ // Change dead register definitions to refer to the zero register.
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
+ addPass(createAArch64DeadRegisterDefinitions());
+
// Use AdvSIMD scalar instructions whenever profitable.
if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
addPass(createAArch64AdvSIMDScalar());
@@ -448,9 +452,6 @@ void AArch64PassConfig::addPostRegAlloc(
if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
addPass(createAArch64RedundantCopyEliminationPass());
- // Change dead register definitions to refer to the zero register.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
- addPass(createAArch64DeadRegisterDefinitions());
if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
// Improve performance for some FP/SIMD code for A57.
addPass(createAArch64A57FPLoadBalancing());
Added: llvm/trunk/test/CodeGen/AArch64/arm64-zeroreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-zeroreg.ll?rev=287076&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-zeroreg.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-zeroreg.ll Tue Nov 15 21:38:27 2016
@@ -0,0 +1,91 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "aarch64--"
+
+declare void @begin()
+declare void @end()
+
+; Test that we use the zero register before regalloc and do not unnecessarily
+; clobber a register with the SUBS (cmp) instruction.
+; CHECK-LABEL: func:
+define void @func(i64* %addr) {
+ ; We should not see any spills or reloads between begin and end
+ ; CHECK: bl begin
+ ; CHECK-NOT: str{{.*}}sp
+ ; CHECK-NOT: Folded Spill
+ ; CHECK-NOT: ldr{{.*}}sp
+ ; CHECK-NOT: Folded Reload
+ call void @begin()
+ %v0 = load volatile i64, i64* %addr
+ %v1 = load volatile i64, i64* %addr
+ %v2 = load volatile i64, i64* %addr
+ %v3 = load volatile i64, i64* %addr
+ %v4 = load volatile i64, i64* %addr
+ %v5 = load volatile i64, i64* %addr
+ %v6 = load volatile i64, i64* %addr
+ %v7 = load volatile i64, i64* %addr
+ %v8 = load volatile i64, i64* %addr
+ %v9 = load volatile i64, i64* %addr
+ %v10 = load volatile i64, i64* %addr
+ %v11 = load volatile i64, i64* %addr
+ %v12 = load volatile i64, i64* %addr
+ %v13 = load volatile i64, i64* %addr
+ %v14 = load volatile i64, i64* %addr
+ %v15 = load volatile i64, i64* %addr
+ %v16 = load volatile i64, i64* %addr
+ %v17 = load volatile i64, i64* %addr
+ %v18 = load volatile i64, i64* %addr
+ %v19 = load volatile i64, i64* %addr
+ %v20 = load volatile i64, i64* %addr
+ %v21 = load volatile i64, i64* %addr
+ %v22 = load volatile i64, i64* %addr
+ %v23 = load volatile i64, i64* %addr
+ %v24 = load volatile i64, i64* %addr
+ %v25 = load volatile i64, i64* %addr
+ %v26 = load volatile i64, i64* %addr
+ %v27 = load volatile i64, i64* %addr
+ %v28 = load volatile i64, i64* %addr
+ %v29 = load volatile i64, i64* %addr
+
+ %c = icmp eq i64 %v0, %v1
+ br i1 %c, label %if.then, label %if.end
+
+if.then:
+ store volatile i64 %v2, i64* %addr
+ br label %if.end
+
+if.end:
+ store volatile i64 %v0, i64* %addr
+ store volatile i64 %v1, i64* %addr
+ store volatile i64 %v2, i64* %addr
+ store volatile i64 %v3, i64* %addr
+ store volatile i64 %v4, i64* %addr
+ store volatile i64 %v5, i64* %addr
+ store volatile i64 %v6, i64* %addr
+ store volatile i64 %v7, i64* %addr
+ store volatile i64 %v8, i64* %addr
+ store volatile i64 %v9, i64* %addr
+ store volatile i64 %v10, i64* %addr
+ store volatile i64 %v11, i64* %addr
+ store volatile i64 %v12, i64* %addr
+ store volatile i64 %v13, i64* %addr
+ store volatile i64 %v14, i64* %addr
+ store volatile i64 %v15, i64* %addr
+ store volatile i64 %v16, i64* %addr
+ store volatile i64 %v17, i64* %addr
+ store volatile i64 %v18, i64* %addr
+ store volatile i64 %v19, i64* %addr
+ store volatile i64 %v20, i64* %addr
+ store volatile i64 %v21, i64* %addr
+ store volatile i64 %v22, i64* %addr
+ store volatile i64 %v23, i64* %addr
+ store volatile i64 %v24, i64* %addr
+ store volatile i64 %v25, i64* %addr
+ store volatile i64 %v26, i64* %addr
+ store volatile i64 %v27, i64* %addr
+ store volatile i64 %v28, i64* %addr
+ store volatile i64 %v29, i64* %addr
+ ; CHECK: bl end
+ call void @end()
+
+ ret void
+}
More information about the llvm-commits
mailing list