[llvm] r205481 - [CodeGen] Teach the peephole optimizer to remember (and exploit) all folding

Thu Apr 3 01:56:03 PDT 2014

Sorry, I didn't notice that it had been fixed already.

On Thu, Apr 3, 2014 at 12:29 PM, Alexey Samsonov <samsonov at google.com>wrote:

> Hi Lang,
>
> Can this commit be the culprit of Clang crashes we see on asan/msan
> bootstrap buildbot?
>
> http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap/builds/2896/steps/check-all%20stage3%2Fmsan/logs/stdio
>
>
> On Thu, Apr 3, 2014 at 2:59 AM, Lang Hames <lhames at gmail.com> wrote:
>
>> Author: lhames
>> Date: Wed Apr  2 17:59:58 2014
>> New Revision: 205481
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=205481&view=rev
>> Log:
>> [CodeGen] Teach the peephole optimizer to remember (and exploit) all
>> folding
>> opportunities in the current basic block, rather than just the last one
>> seen.
>>
>> <rdar://problem/16478629>
>>
>>
>> Added:
>>     llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll
>> Modified:
>>     llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
>>
>> Modified: llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?rev=205481&r1=205480&r2=205481&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp Wed Apr  2 17:59:58 2014
>> @@ -133,7 +133,8 @@ namespace {
>>      bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
>>                         SmallSet<unsigned, 4> &ImmDefRegs,
>>                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
>> -    bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
>> +    bool isLoadFoldable(MachineInstr *MI,
>> +                        SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
>>    };
>>  }
>>
>> @@ -489,8 +490,9 @@ bool PeepholeOptimizer::optimizeCopyOrBi
>>  /// isLoadFoldable - Check whether MI is a candidate for folding into a
>> later
>>  /// instruction. We only fold loads to virtual registers and the virtual
>>  /// register defined has a single use.
>> -bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
>> -                                       unsigned &FoldAsLoadDefReg) {
>> +bool PeepholeOptimizer::isLoadFoldable(
>> +                              MachineInstr *MI,
>> +                              SmallSet<unsigned, 16>
>> &FoldAsLoadDefCandidates) {
>>    if (!MI->canFoldAsLoad() || !MI->mayLoad())
>>      return false;
>>    const MCInstrDesc &MCID = MI->getDesc();
>> @@ -504,7 +506,7 @@ bool PeepholeOptimizer::isLoadFoldable(M
>>    if (!MI->getOperand(0).getSubReg() &&
>>        TargetRegisterInfo::isVirtualRegister(Reg) &&
>>        MRI->hasOneNonDBGUse(Reg)) {
>> -    FoldAsLoadDefReg = Reg;
>> +    FoldAsLoadDefCandidates.insert(Reg);
>>      return true;
>>    }
>>    return false;
>> @@ -570,18 +572,14 @@ bool PeepholeOptimizer::runOnMachineFunc
>>
>>    bool Changed = false;
>>
>> -  SmallPtrSet<MachineInstr*, 8> LocalMIs;
>> -  SmallSet<unsigned, 4> ImmDefRegs;
>> -  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
>> -  unsigned FoldAsLoadDefReg;
>>    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E;
>> ++I) {
>>      MachineBasicBlock *MBB = &*I;
>>
>>      bool SeenMoveImm = false;
>> -    LocalMIs.clear();
>> -    ImmDefRegs.clear();
>> -    ImmDefMIs.clear();
>> -    FoldAsLoadDefReg = 0;
>> +    SmallPtrSet<MachineInstr*, 8> LocalMIs;
>> +    SmallSet<unsigned, 4> ImmDefRegs;
>> +    DenseMap<unsigned, MachineInstr*> ImmDefMIs;
>> +    SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
>>
>>      for (MachineBasicBlock::iterator
>>             MII = I->begin(), MIE = I->end(); MII != MIE; ) {
>> @@ -595,15 +593,15 @@ bool PeepholeOptimizer::runOnMachineFunc
>>            continue;
>>
>>        // If there exists an instruction which belongs to the following
>> -      // categories, we will discard the load candidate.
>> +      // categories, we will discard the load candidates.
>>        if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
>>            MI->isKill() || MI->isInlineAsm() ||
>>            MI->hasUnmodeledSideEffects()) {
>> -        FoldAsLoadDefReg = 0;
>> +        FoldAsLoadDefCandidates.clear();
>>          continue;
>>        }
>>        if (MI->mayStore() || MI->isCall())
>> -        FoldAsLoadDefReg = 0;
>> +        FoldAsLoadDefCandidates.clear();
>>
>>        if (((MI->isBitcast() || MI->isCopy()) &&
>> optimizeCopyOrBitcast(MI)) ||
>>            (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
>> @@ -630,30 +628,41 @@ bool PeepholeOptimizer::runOnMachineFunc
>>        // Check whether MI is a load candidate for folding into a later
>>        // instruction. If MI is not a candidate, check whether we can
>> fold an
>>        // earlier load into MI.
>> -      if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
>> +      if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
>> +          !FoldAsLoadDefCandidates.empty()) {
>>          // We need to fold load after optimizeCmpInstr, since
>> optimizeCmpInstr
>>          // can enable folding by converting SUB to CMP.
>>          // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it
>> and we
>>          // need it for markUsesInDebugValueAsUndef().
>> -        unsigned FoldedReg = FoldAsLoadDefReg;
>> -        MachineInstr *DefMI = 0;
>> -        MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
>> -                                                      FoldAsLoadDefReg,
>> DefMI);
>> -        if (FoldMI) {
>> -          // Update LocalMIs since we replaced MI with FoldMI and
>> deleted DefMI.
>> -          DEBUG(dbgs() << "Replacing: " << *MI);
>> -          DEBUG(dbgs() << "     With: " << *FoldMI);
>> -          LocalMIs.erase(MI);
>> -          LocalMIs.erase(DefMI);
>> -          LocalMIs.insert(FoldMI);
>> -          MI->eraseFromParent();
>> -          DefMI->eraseFromParent();
>> -          MRI->markUsesInDebugValueAsUndef(FoldedReg);
>> -          ++NumLoadFold;
>> -
>> -          // MI is replaced with FoldMI.
>> -          Changed = true;
>> -          continue;
>> +        const MCInstrDesc &MIDesc = MI->getDesc();
>> +        for (unsigned i = MIDesc.getNumDefs(); i !=
>> MIDesc.getNumOperands();
>> +             ++i) {
>> +          const MachineOperand &MOp = MI->getOperand(i);
>> +          if (!MOp.isReg())
>> +            continue;
>> +          unsigned TryFoldReg = MOp.getReg();
>> +          if (FoldAsLoadDefCandidates.count(TryFoldReg)) {
>> +            MachineInstr *DefMI = 0;
>> +            MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
>> TryFoldReg,
>> +                                                          DefMI);
>> +            if (FoldMI) {
>> +              // Update LocalMIs since we replaced MI with FoldMI and
>> deleted
>> +              // DefMI.
>> +              DEBUG(dbgs() << "Replacing: " << *MI);
>> +              DEBUG(dbgs() << "     With: " << *FoldMI);
>> +              LocalMIs.erase(MI);
>> +              LocalMIs.erase(DefMI);
>> +              LocalMIs.insert(FoldMI);
>> +              MI->eraseFromParent();
>> +              DefMI->eraseFromParent();
>> +              MRI->markUsesInDebugValueAsUndef(TryFoldReg);
>> +              FoldAsLoadDefCandidates.erase(TryFoldReg);
>> +              ++NumLoadFold;
>> +              // MI is replaced with FoldMI.
>> +              Changed = true;
>> +              break;
>> +            }
>> +          }
>>          }
>>        }
>>      }
>>
>> Added: llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll?rev=205481&view=auto
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll Wed Apr  2
>> 17:59:58 2014
>> @@ -0,0 +1,29 @@
>> +; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s
>> +;
>> +; Test multiple peephole-time folds in a single basic block.
>> +; <rdar://problem/16478629>
>> +
>> +define <8 x float> @test_peephole_multi_fold(<8 x float>* %p1, <8 x
>> float>* %p2) {
>> +entry:
>> +  br label %loopbody
>> +
>> +loopbody:
>> +; CHECK: _test_peephole_multi_fold:
>> +; CHECK: vfmadd231ps (%rdi),
>> +; CHECK: vfmadd231ps (%rsi),
>> +  %vsum1 = phi <8 x float> [ %vsum1.next, %loopbody ], [
>> zeroinitializer, %entry ]
>> +  %vsum2 = phi <8 x float> [ %vsum2.next, %loopbody ], [
>> zeroinitializer, %entry ]
>> +  %m1 = load <8 x float>* %p1, align 1
>> +  %m2 = load <8 x float>* %p2, align 1
>> +  %vsum1.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x
>> float> %m1, <8 x float> zeroinitializer, <8 x float> %vsum1)
>> +  %vsum2.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x
>> float> %m2, <8 x float> zeroinitializer, <8 x float> %vsum2)
>> +  %vsum1.next.1 = extractelement <8 x float> %vsum1.next, i32 0
>> +  %c = fcmp oeq float %vsum1.next.1, 0.0
>> +  br i1 %c, label %loopbody, label %loopexit
>> +
>> +loopexit:
>> +  %r = fadd <8 x float> %vsum1.next, %vsum2.next
>> +  ret <8 x float> %r
>> +}
>> +
>> +declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x
>> float>, <8 x float>)
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
>
>
> --
> Alexey Samsonov, MSK
>

-- 
Alexey Samsonov, MSK
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140403/9132cdea/attachment.html>