[llvm] r205481 - [CodeGen] Teach the peephole optimizer to remember (and exploit) all folding

Lang Hames lhames at gmail.com
Thu Apr 3 08:19:52 PDT 2014


No worries, and yes - this was probably the cause. :)


On Thu, Apr 3, 2014 at 1:56 AM, Alexey Samsonov <samsonov at google.com> wrote:

> Sorry, I didn't notice that it had been fixed already.
>
>
> On Thu, Apr 3, 2014 at 12:29 PM, Alexey Samsonov <samsonov at google.com>wrote:
>
>> Hi Lang,
>>
>> Can this commit be the culprit of Clang crashes we see on asan/msan
>> bootstrap buildbot?
>>
>> http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap/builds/2896/steps/check-all%20stage3%2Fmsan/logs/stdio
>>
>>
>> On Thu, Apr 3, 2014 at 2:59 AM, Lang Hames <lhames at gmail.com> wrote:
>>
>>> Author: lhames
>>> Date: Wed Apr  2 17:59:58 2014
>>> New Revision: 205481
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=205481&view=rev
>>> Log:
>>> [CodeGen] Teach the peephole optimizer to remember (and exploit) all
>>> folding
>>> opportunities in the current basic block, rather than just the last one
>>> seen.
>>>
>>> <rdar://problem/16478629>
>>>
>>>
>>> Added:
>>>     llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll
>>> Modified:
>>>     llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
>>>
>>> Modified: llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?rev=205481&r1=205480&r2=205481&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp (original)
>>> +++ llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp Wed Apr  2 17:59:58 2014
>>> @@ -133,7 +133,8 @@ namespace {
>>>      bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
>>>                         SmallSet<unsigned, 4> &ImmDefRegs,
>>>                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
>>> -    bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
>>> +    bool isLoadFoldable(MachineInstr *MI,
>>> +                        SmallSet<unsigned, 16>
>>> &FoldAsLoadDefCandidates);
>>>    };
>>>  }
>>>
>>> @@ -489,8 +490,9 @@ bool PeepholeOptimizer::optimizeCopyOrBi
>>>  /// isLoadFoldable - Check whether MI is a candidate for folding into a
>>> later
>>>  /// instruction. We only fold loads to virtual registers and the virtual
>>>  /// register defined has a single use.
>>> -bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
>>> -                                       unsigned &FoldAsLoadDefReg) {
>>> +bool PeepholeOptimizer::isLoadFoldable(
>>> +                              MachineInstr *MI,
>>> +                              SmallSet<unsigned, 16>
>>> &FoldAsLoadDefCandidates) {
>>>    if (!MI->canFoldAsLoad() || !MI->mayLoad())
>>>      return false;
>>>    const MCInstrDesc &MCID = MI->getDesc();
>>> @@ -504,7 +506,7 @@ bool PeepholeOptimizer::isLoadFoldable(M
>>>    if (!MI->getOperand(0).getSubReg() &&
>>>        TargetRegisterInfo::isVirtualRegister(Reg) &&
>>>        MRI->hasOneNonDBGUse(Reg)) {
>>> -    FoldAsLoadDefReg = Reg;
>>> +    FoldAsLoadDefCandidates.insert(Reg);
>>>      return true;
>>>    }
>>>    return false;
>>> @@ -570,18 +572,14 @@ bool PeepholeOptimizer::runOnMachineFunc
>>>
>>>    bool Changed = false;
>>>
>>> -  SmallPtrSet<MachineInstr*, 8> LocalMIs;
>>> -  SmallSet<unsigned, 4> ImmDefRegs;
>>> -  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
>>> -  unsigned FoldAsLoadDefReg;
>>>    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E;
>>> ++I) {
>>>      MachineBasicBlock *MBB = &*I;
>>>
>>>      bool SeenMoveImm = false;
>>> -    LocalMIs.clear();
>>> -    ImmDefRegs.clear();
>>> -    ImmDefMIs.clear();
>>> -    FoldAsLoadDefReg = 0;
>>> +    SmallPtrSet<MachineInstr*, 8> LocalMIs;
>>> +    SmallSet<unsigned, 4> ImmDefRegs;
>>> +    DenseMap<unsigned, MachineInstr*> ImmDefMIs;
>>> +    SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
>>>
>>>      for (MachineBasicBlock::iterator
>>>             MII = I->begin(), MIE = I->end(); MII != MIE; ) {
>>> @@ -595,15 +593,15 @@ bool PeepholeOptimizer::runOnMachineFunc
>>>            continue;
>>>
>>>        // If there exists an instruction which belongs to the following
>>> -      // categories, we will discard the load candidate.
>>> +      // categories, we will discard the load candidates.
>>>        if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
>>>            MI->isKill() || MI->isInlineAsm() ||
>>>            MI->hasUnmodeledSideEffects()) {
>>> -        FoldAsLoadDefReg = 0;
>>> +        FoldAsLoadDefCandidates.clear();
>>>          continue;
>>>        }
>>>        if (MI->mayStore() || MI->isCall())
>>> -        FoldAsLoadDefReg = 0;
>>> +        FoldAsLoadDefCandidates.clear();
>>>
>>>        if (((MI->isBitcast() || MI->isCopy()) &&
>>> optimizeCopyOrBitcast(MI)) ||
>>>            (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
>>> @@ -630,30 +628,41 @@ bool PeepholeOptimizer::runOnMachineFunc
>>>        // Check whether MI is a load candidate for folding into a later
>>>        // instruction. If MI is not a candidate, check whether we can
>>> fold an
>>>        // earlier load into MI.
>>> -      if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
>>> +      if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
>>> +          !FoldAsLoadDefCandidates.empty()) {
>>>          // We need to fold load after optimizeCmpInstr, since
>>> optimizeCmpInstr
>>>          // can enable folding by converting SUB to CMP.
>>>          // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it
>>> and we
>>>          // need it for markUsesInDebugValueAsUndef().
>>> -        unsigned FoldedReg = FoldAsLoadDefReg;
>>> -        MachineInstr *DefMI = 0;
>>> -        MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
>>> -                                                      FoldAsLoadDefReg,
>>> DefMI);
>>> -        if (FoldMI) {
>>> -          // Update LocalMIs since we replaced MI with FoldMI and
>>> deleted DefMI.
>>> -          DEBUG(dbgs() << "Replacing: " << *MI);
>>> -          DEBUG(dbgs() << "     With: " << *FoldMI);
>>> -          LocalMIs.erase(MI);
>>> -          LocalMIs.erase(DefMI);
>>> -          LocalMIs.insert(FoldMI);
>>> -          MI->eraseFromParent();
>>> -          DefMI->eraseFromParent();
>>> -          MRI->markUsesInDebugValueAsUndef(FoldedReg);
>>> -          ++NumLoadFold;
>>> -
>>> -          // MI is replaced with FoldMI.
>>> -          Changed = true;
>>> -          continue;
>>> +        const MCInstrDesc &MIDesc = MI->getDesc();
>>> +        for (unsigned i = MIDesc.getNumDefs(); i !=
>>> MIDesc.getNumOperands();
>>> +             ++i) {
>>> +          const MachineOperand &MOp = MI->getOperand(i);
>>> +          if (!MOp.isReg())
>>> +            continue;
>>> +          unsigned TryFoldReg = MOp.getReg();
>>> +          if (FoldAsLoadDefCandidates.count(TryFoldReg)) {
>>> +            MachineInstr *DefMI = 0;
>>> +            MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
>>> TryFoldReg,
>>> +                                                          DefMI);
>>> +            if (FoldMI) {
>>> +              // Update LocalMIs since we replaced MI with FoldMI and
>>> deleted
>>> +              // DefMI.
>>> +              DEBUG(dbgs() << "Replacing: " << *MI);
>>> +              DEBUG(dbgs() << "     With: " << *FoldMI);
>>> +              LocalMIs.erase(MI);
>>> +              LocalMIs.erase(DefMI);
>>> +              LocalMIs.insert(FoldMI);
>>> +              MI->eraseFromParent();
>>> +              DefMI->eraseFromParent();
>>> +              MRI->markUsesInDebugValueAsUndef(TryFoldReg);
>>> +              FoldAsLoadDefCandidates.erase(TryFoldReg);
>>> +              ++NumLoadFold;
>>> +              // MI is replaced with FoldMI.
>>> +              Changed = true;
>>> +              break;
>>> +            }
>>> +          }
>>>          }
>>>        }
>>>      }
>>>
>>> Added: llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll?rev=205481&view=auto
>>>
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll (added)
>>> +++ llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll Wed Apr  2
>>> 17:59:58 2014
>>> @@ -0,0 +1,29 @@
>>> +; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s
>>> +;
>>> +; Test multiple peephole-time folds in a single basic block.
>>> +; <rdar://problem/16478629>
>>> +
>>> +define <8 x float> @test_peephole_multi_fold(<8 x float>* %p1, <8 x
>>> float>* %p2) {
>>> +entry:
>>> +  br label %loopbody
>>> +
>>> +loopbody:
>>> +; CHECK: _test_peephole_multi_fold:
>>> +; CHECK: vfmadd231ps (%rdi),
>>> +; CHECK: vfmadd231ps (%rsi),
>>> +  %vsum1 = phi <8 x float> [ %vsum1.next, %loopbody ], [
>>> zeroinitializer, %entry ]
>>> +  %vsum2 = phi <8 x float> [ %vsum2.next, %loopbody ], [
>>> zeroinitializer, %entry ]
>>> +  %m1 = load <8 x float>* %p1, align 1
>>> +  %m2 = load <8 x float>* %p2, align 1
>>> +  %vsum1.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x
>>> float> %m1, <8 x float> zeroinitializer, <8 x float> %vsum1)
>>> +  %vsum2.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x
>>> float> %m2, <8 x float> zeroinitializer, <8 x float> %vsum2)
>>> +  %vsum1.next.1 = extractelement <8 x float> %vsum1.next, i32 0
>>> +  %c = fcmp oeq float %vsum1.next.1, 0.0
>>> +  br i1 %c, label %loopbody, label %loopexit
>>> +
>>> +loopexit:
>>> +  %r = fadd <8 x float> %vsum1.next, %vsum2.next
>>> +  ret <8 x float> %r
>>> +}
>>> +
>>> +declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x
>>> float>, <8 x float>)
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>>
>>
>>
>>
>> --
>> Alexey Samsonov, MSK
>>
>
>
>
> --
> Alexey Samsonov, MSK
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140403/f0e55944/attachment.html>


More information about the llvm-commits mailing list