[llvm] r205481 - [CodeGen] Teach the peephole optimizer to remember (and exploit) all folding
Lang Hames
lhames at gmail.com
Wed Apr 2 15:59:58 PDT 2014
Author: lhames
Date: Wed Apr 2 17:59:58 2014
New Revision: 205481
URL: http://llvm.org/viewvc/llvm-project?rev=205481&view=rev
Log:
[CodeGen] Teach the peephole optimizer to remember (and exploit) all folding
opportunities in the current basic block, rather than just the last one seen.
<rdar://problem/16478629>
Added:
llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll
Modified:
llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
Modified: llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?rev=205481&r1=205480&r2=205481&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp (original)
+++ llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp Wed Apr 2 17:59:58 2014
@@ -133,7 +133,8 @@ namespace {
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
- bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
+ bool isLoadFoldable(MachineInstr *MI,
+ SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
};
}
@@ -489,8 +490,9 @@ bool PeepholeOptimizer::optimizeCopyOrBi
/// isLoadFoldable - Check whether MI is a candidate for folding into a later
/// instruction. We only fold loads to virtual registers and the virtual
/// register defined has a single use.
-bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
- unsigned &FoldAsLoadDefReg) {
+bool PeepholeOptimizer::isLoadFoldable(
+ MachineInstr *MI,
+ SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
if (!MI->canFoldAsLoad() || !MI->mayLoad())
return false;
const MCInstrDesc &MCID = MI->getDesc();
@@ -504,7 +506,7 @@ bool PeepholeOptimizer::isLoadFoldable(M
if (!MI->getOperand(0).getSubReg() &&
TargetRegisterInfo::isVirtualRegister(Reg) &&
MRI->hasOneNonDBGUse(Reg)) {
- FoldAsLoadDefReg = Reg;
+ FoldAsLoadDefCandidates.insert(Reg);
return true;
}
return false;
@@ -570,18 +572,14 @@ bool PeepholeOptimizer::runOnMachineFunc
bool Changed = false;
- SmallPtrSet<MachineInstr*, 8> LocalMIs;
- SmallSet<unsigned, 4> ImmDefRegs;
- DenseMap<unsigned, MachineInstr*> ImmDefMIs;
- unsigned FoldAsLoadDefReg;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
bool SeenMoveImm = false;
- LocalMIs.clear();
- ImmDefRegs.clear();
- ImmDefMIs.clear();
- FoldAsLoadDefReg = 0;
+ SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ SmallSet<unsigned, 4> ImmDefRegs;
+ DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+ SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
for (MachineBasicBlock::iterator
MII = I->begin(), MIE = I->end(); MII != MIE; ) {
@@ -595,15 +593,15 @@ bool PeepholeOptimizer::runOnMachineFunc
continue;
// If there exists an instruction which belongs to the following
- // categories, we will discard the load candidate.
+ // categories, we will discard the load candidates.
if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
MI->isKill() || MI->isInlineAsm() ||
MI->hasUnmodeledSideEffects()) {
- FoldAsLoadDefReg = 0;
+ FoldAsLoadDefCandidates.clear();
continue;
}
if (MI->mayStore() || MI->isCall())
- FoldAsLoadDefReg = 0;
+ FoldAsLoadDefCandidates.clear();
if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||
(MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
@@ -630,30 +628,41 @@ bool PeepholeOptimizer::runOnMachineFunc
// Check whether MI is a load candidate for folding into a later
// instruction. If MI is not a candidate, check whether we can fold an
// earlier load into MI.
- if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
+ if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
+ !FoldAsLoadDefCandidates.empty()) {
// We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
// can enable folding by converting SUB to CMP.
// Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and we
// need it for markUsesInDebugValueAsUndef().
- unsigned FoldedReg = FoldAsLoadDefReg;
- MachineInstr *DefMI = 0;
- MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
- FoldAsLoadDefReg, DefMI);
- if (FoldMI) {
- // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
- DEBUG(dbgs() << "Replacing: " << *MI);
- DEBUG(dbgs() << " With: " << *FoldMI);
- LocalMIs.erase(MI);
- LocalMIs.erase(DefMI);
- LocalMIs.insert(FoldMI);
- MI->eraseFromParent();
- DefMI->eraseFromParent();
- MRI->markUsesInDebugValueAsUndef(FoldedReg);
- ++NumLoadFold;
-
- // MI is replaced with FoldMI.
- Changed = true;
- continue;
+ const MCInstrDesc &MIDesc = MI->getDesc();
+ for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
+ ++i) {
+ const MachineOperand &MOp = MI->getOperand(i);
+ if (!MOp.isReg())
+ continue;
+ unsigned TryFoldReg = MOp.getReg();
+ if (FoldAsLoadDefCandidates.count(TryFoldReg)) {
+ MachineInstr *DefMI = 0;
+ MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, TryFoldReg,
+ DefMI);
+ if (FoldMI) {
+ // Update LocalMIs since we replaced MI with FoldMI and deleted
+ // DefMI.
+ DEBUG(dbgs() << "Replacing: " << *MI);
+ DEBUG(dbgs() << " With: " << *FoldMI);
+ LocalMIs.erase(MI);
+ LocalMIs.erase(DefMI);
+ LocalMIs.insert(FoldMI);
+ MI->eraseFromParent();
+ DefMI->eraseFromParent();
+ MRI->markUsesInDebugValueAsUndef(TryFoldReg);
+ FoldAsLoadDefCandidates.erase(TryFoldReg);
+ ++NumLoadFold;
+ // MI is replaced with FoldMI.
+ Changed = true;
+ break;
+ }
+ }
}
}
}
Added: llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll?rev=205481&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll (added)
+++ llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll Wed Apr 2 17:59:58 2014
@@ -0,0 +1,29 @@
+; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s
+;
+; Test multiple peephole-time folds in a single basic block.
+; <rdar://problem/16478629>
+
+define <8 x float> @test_peephole_multi_fold(<8 x float>* %p1, <8 x float>* %p2) {
+entry:
+ br label %loopbody
+
+loopbody:
+; CHECK: _test_peephole_multi_fold:
+; CHECK: vfmadd231ps (%rdi),
+; CHECK: vfmadd231ps (%rsi),
+ %vsum1 = phi <8 x float> [ %vsum1.next, %loopbody ], [ zeroinitializer, %entry ]
+ %vsum2 = phi <8 x float> [ %vsum2.next, %loopbody ], [ zeroinitializer, %entry ]
+ %m1 = load <8 x float>* %p1, align 1
+ %m2 = load <8 x float>* %p2, align 1
+ %vsum1.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m1, <8 x float> zeroinitializer, <8 x float> %vsum1)
+ %vsum2.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m2, <8 x float> zeroinitializer, <8 x float> %vsum2)
+ %vsum1.next.1 = extractelement <8 x float> %vsum1.next, i32 0
+ %c = fcmp oeq float %vsum1.next.1, 0.0
+ br i1 %c, label %loopbody, label %loopexit
+
+loopexit:
+ %r = fadd <8 x float> %vsum1.next, %vsum2.next
+ ret <8 x float> %r
+}
+
+declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
More information about the llvm-commits
mailing list