[llvm] r220381 - [AArch64] Cleanup A57PBQPConstraints
Arnaud A. de Grandmaison
arnaud.degrandmaison at arm.com
Wed Oct 22 05:40:20 PDT 2014
Author: aadg
Date: Wed Oct 22 07:40:20 2014
New Revision: 220381
URL: http://llvm.org/viewvc/llvm-project?rev=220381&view=rev
Log:
[AArch64] Cleanup A57PBQPConstraints
And add a long awaited testcase.
Added:
llvm/trunk/test/CodeGen/AArch64/PBQP-chain.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
llvm/trunk/lib/Target/AArch64/AArch64PBQPRegAlloc.h
llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
Modified: llvm/trunk/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp?rev=220381&r1=220380&r2=220381&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp Wed Oct 22 07:40:20 2014
@@ -156,19 +156,17 @@ bool haveSameParity(unsigned reg1, unsig
}
-bool A57PBQPConstraints::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
+bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
unsigned Ra) {
if (Rd == Ra)
return false;
- const TargetRegisterInfo &TRI =
- *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
LiveIntervals &LIs = G.getMetadata().LIS;
- if (TRI.isPhysicalRegister(Rd) || TRI.isPhysicalRegister(Ra)) {
- DEBUG(dbgs() << "Rd is a physical reg:" << TRI.isPhysicalRegister(Rd)
+ if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) {
+ DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd)
<< '\n');
- DEBUG(dbgs() << "Ra is a physical reg:" << TRI.isPhysicalRegister(Ra)
+ DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra)
<< '\n');
return false;
}
@@ -196,7 +194,7 @@ bool A57PBQPConstraints::addIntraChainCo
unsigned pRd = (*vRdAllowed)[i];
for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
unsigned pRa = (*vRaAllowed)[j];
- if (livesOverlap && TRI.regsOverlap(pRd, pRa))
+ if (livesOverlap && TRI->regsOverlap(pRd, pRa))
costs[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
else
costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0;
@@ -242,23 +240,20 @@ bool A57PBQPConstraints::addIntraChainCo
return true;
}
-void A57PBQPConstraints::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
+void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
unsigned Ra) {
- const TargetRegisterInfo &TRI =
- *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
- (void)TRI;
LiveIntervals &LIs = G.getMetadata().LIS;
// Do some Chain management
if (Chains.count(Ra)) {
if (Rd != Ra) {
- DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, &TRI) << " to "
- << PrintReg(Rd, &TRI) << '\n';);
+ DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, TRI) << " to "
+ << PrintReg(Rd, TRI) << '\n';);
Chains.remove(Ra);
Chains.insert(Rd);
}
} else {
- DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, &TRI)
+ DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, TRI)
<< '\n';);
Chains.insert(Rd);
}
@@ -322,24 +317,41 @@ void A57PBQPConstraints::addInterChainCo
}
}
-void A57PBQPConstraints::apply(PBQPRAGraph &G) {
- MachineFunction &MF = G.getMetadata().MF;
+static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
+ const MachineInstr &MI) {
+ LiveInterval LI = LIs.getInterval(reg);
+ SlotIndex SI = LIs.getInstructionIndex(&MI);
+ return LI.expiredAt(SI);
+}
+
+void A57ChainingConstraint::apply(PBQPRAGraph &G) {
+ const MachineFunction &MF = G.getMetadata().MF;
+ LiveIntervals &LIs = G.getMetadata().LIS;
- const TargetRegisterInfo &TRI =
- *MF.getTarget().getSubtargetImpl()->getRegisterInfo();
- (void)TRI;
+ TRI = MF.getTarget().getSubtargetImpl()->getRegisterInfo();
DEBUG(MF.dump());
- for (MachineFunction::const_iterator mbbItr = MF.begin(), mbbEnd = MF.end();
- mbbItr != mbbEnd; ++mbbItr) {
- const MachineBasicBlock *MBB = &*mbbItr;
+ for (const auto &MBB: MF) {
Chains.clear(); // FIXME: really needed ? Could not work at MF level ?
- for (MachineBasicBlock::const_iterator miItr = MBB->begin(),
- miEnd = MBB->end();
- miItr != miEnd; ++miItr) {
- const MachineInstr *MI = &*miItr;
- switch (MI->getOpcode()) {
+ for (const auto &MI: MBB) {
+
+ // Forget Chains which have expired
+ for (auto r : Chains) {
+ SmallVector<unsigned, 8> toDel;
+ if(regJustKilledBefore(LIs, r, MI)) {
+ DEBUG(dbgs() << "Killing chain " << PrintReg(r, TRI) << " at ";
+ MI.print(dbgs()););
+ toDel.push_back(r);
+ }
+
+ while (!toDel.empty()) {
+ Chains.remove(toDel.back());
+ toDel.pop_back();
+ }
+ }
+
+ switch (MI.getOpcode()) {
case AArch64::FMSUBSrrr:
case AArch64::FMADDSrrr:
case AArch64::FNMSUBSrrr:
@@ -348,8 +360,8 @@ void A57PBQPConstraints::apply(PBQPRAGra
case AArch64::FMADDDrrr:
case AArch64::FNMSUBDrrr:
case AArch64::FNMADDDrrr: {
- unsigned Rd = MI->getOperand(0).getReg();
- unsigned Ra = MI->getOperand(3).getReg();
+ unsigned Rd = MI.getOperand(0).getReg();
+ unsigned Ra = MI.getOperand(3).getReg();
if (addIntraChainConstraint(G, Rd, Ra))
addInterChainConstraint(G, Rd, Ra);
@@ -358,26 +370,13 @@ void A57PBQPConstraints::apply(PBQPRAGra
case AArch64::FMLAv2f32:
case AArch64::FMLSv2f32: {
- unsigned Rd = MI->getOperand(0).getReg();
+ unsigned Rd = MI.getOperand(0).getReg();
addInterChainConstraint(G, Rd, Rd);
break;
}
default:
- // Forget Chains which have been killed
- for (auto r : Chains) {
- SmallVector<unsigned, 8> toDel;
- if (MI->killsRegister(r)) {
- DEBUG(dbgs() << "Killing chain " << PrintReg(r, &TRI) << " at ";
- MI->print(dbgs()););
- toDel.push_back(r);
- }
-
- while (!toDel.empty()) {
- Chains.remove(toDel.back());
- toDel.pop_back();
- }
- }
+ break;
}
}
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64PBQPRegAlloc.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64PBQPRegAlloc.h?rev=220381&r1=220380&r2=220381&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64PBQPRegAlloc.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64PBQPRegAlloc.h Wed Oct 22 07:40:20 2014
@@ -15,14 +15,15 @@
namespace llvm {
-class A57PBQPConstraints : public PBQPRAConstraint {
+/// Add the accumulator chaining constraint to a PBQP graph
+class A57ChainingConstraint : public PBQPRAConstraint {
public:
-
// Add A57 specific constraints to the PBQP graph.
void apply(PBQPRAGraph &G) override;
private:
SmallSetVector<unsigned, 32> Chains;
+ const TargetRegisterInfo *TRI;
// Add the accumulator chaining constraint, inside the chain, i.e. so that
// parity(Rd) == parity(Ra).
@@ -32,7 +33,6 @@ private:
// Add constraints between existing chains
void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
};
-
}
#endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp?rev=220381&r1=220380&r2=220381&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp Wed Oct 22 07:40:20 2014
@@ -137,5 +137,8 @@ bool AArch64Subtarget::enableEarlyIfConv
std::unique_ptr<PBQPRAConstraint>
AArch64Subtarget::getCustomPBQPConstraints() const {
- return llvm::make_unique<A57PBQPConstraints>();
+ if (!isCortexA57())
+ return nullptr;
+
+ return llvm::make_unique<A57ChainingConstraint>();
}
Added: llvm/trunk/test/CodeGen/AArch64/PBQP-chain.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/PBQP-chain.ll?rev=220381&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/PBQP-chain.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/PBQP-chain.ll Wed Oct 22 07:40:20 2014
@@ -0,0 +1,104 @@
+; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
+; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
+;
+; Test PBQP is able to fulfill the accumulator chaining constraint.
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; CHECK-LABEL: fir
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+define void @fir(double* nocapture %rx, double* nocapture %ry, double* nocapture %c, double* nocapture %x, double* nocapture %y) {
+entry:
+ %0 = load double* %c, align 8
+ %1 = load double* %x, align 8
+ %mul = fmul fast double %1, %0
+ %2 = load double* %y, align 8
+ %mul7 = fmul fast double %2, %0
+ %arrayidx.1 = getelementptr inbounds double* %c, i64 1
+ %3 = load double* %arrayidx.1, align 8
+ %arrayidx2.1 = getelementptr inbounds double* %x, i64 1
+ %4 = load double* %arrayidx2.1, align 8
+ %mul.1 = fmul fast double %4, %3
+ %add.1 = fadd fast double %mul.1, %mul
+ %arrayidx6.1 = getelementptr inbounds double* %y, i64 1
+ %5 = load double* %arrayidx6.1, align 8
+ %mul7.1 = fmul fast double %5, %3
+ %add8.1 = fadd fast double %mul7.1, %mul7
+ %arrayidx.2 = getelementptr inbounds double* %c, i64 2
+ %6 = load double* %arrayidx.2, align 8
+ %arrayidx2.2 = getelementptr inbounds double* %x, i64 2
+ %7 = load double* %arrayidx2.2, align 8
+ %mul.2 = fmul fast double %7, %6
+ %add.2 = fadd fast double %mul.2, %add.1
+ %arrayidx6.2 = getelementptr inbounds double* %y, i64 2
+ %8 = load double* %arrayidx6.2, align 8
+ %mul7.2 = fmul fast double %8, %6
+ %add8.2 = fadd fast double %mul7.2, %add8.1
+ %arrayidx.3 = getelementptr inbounds double* %c, i64 3
+ %9 = load double* %arrayidx.3, align 8
+ %arrayidx2.3 = getelementptr inbounds double* %x, i64 3
+ %10 = load double* %arrayidx2.3, align 8
+ %mul.3 = fmul fast double %10, %9
+ %add.3 = fadd fast double %mul.3, %add.2
+ %arrayidx6.3 = getelementptr inbounds double* %y, i64 3
+ %11 = load double* %arrayidx6.3, align 8
+ %mul7.3 = fmul fast double %11, %9
+ %add8.3 = fadd fast double %mul7.3, %add8.2
+ %arrayidx.4 = getelementptr inbounds double* %c, i64 4
+ %12 = load double* %arrayidx.4, align 8
+ %arrayidx2.4 = getelementptr inbounds double* %x, i64 4
+ %13 = load double* %arrayidx2.4, align 8
+ %mul.4 = fmul fast double %13, %12
+ %add.4 = fadd fast double %mul.4, %add.3
+ %arrayidx6.4 = getelementptr inbounds double* %y, i64 4
+ %14 = load double* %arrayidx6.4, align 8
+ %mul7.4 = fmul fast double %14, %12
+ %add8.4 = fadd fast double %mul7.4, %add8.3
+ %arrayidx.5 = getelementptr inbounds double* %c, i64 5
+ %15 = load double* %arrayidx.5, align 8
+ %arrayidx2.5 = getelementptr inbounds double* %x, i64 5
+ %16 = load double* %arrayidx2.5, align 8
+ %mul.5 = fmul fast double %16, %15
+ %add.5 = fadd fast double %mul.5, %add.4
+ %arrayidx6.5 = getelementptr inbounds double* %y, i64 5
+ %17 = load double* %arrayidx6.5, align 8
+ %mul7.5 = fmul fast double %17, %15
+ %add8.5 = fadd fast double %mul7.5, %add8.4
+ %arrayidx.6 = getelementptr inbounds double* %c, i64 6
+ %18 = load double* %arrayidx.6, align 8
+ %arrayidx2.6 = getelementptr inbounds double* %x, i64 6
+ %19 = load double* %arrayidx2.6, align 8
+ %mul.6 = fmul fast double %19, %18
+ %add.6 = fadd fast double %mul.6, %add.5
+ %arrayidx6.6 = getelementptr inbounds double* %y, i64 6
+ %20 = load double* %arrayidx6.6, align 8
+ %mul7.6 = fmul fast double %20, %18
+ %add8.6 = fadd fast double %mul7.6, %add8.5
+ %arrayidx.7 = getelementptr inbounds double* %c, i64 7
+ %21 = load double* %arrayidx.7, align 8
+ %arrayidx2.7 = getelementptr inbounds double* %x, i64 7
+ %22 = load double* %arrayidx2.7, align 8
+ %mul.7 = fmul fast double %22, %21
+ %add.7 = fadd fast double %mul.7, %add.6
+ %arrayidx6.7 = getelementptr inbounds double* %y, i64 7
+ %23 = load double* %arrayidx6.7, align 8
+ %mul7.7 = fmul fast double %23, %21
+ %add8.7 = fadd fast double %mul7.7, %add8.6
+ store double %add.7, double* %rx, align 8
+ store double %add8.7, double* %ry, align 8
+ ret void
+}
+
More information about the llvm-commits
mailing list