[llvm] r205481 - [CodeGen] Teach the peephole optimizer to remember (and exploit) all folding

Alexey Samsonov samsonov at google.com
Thu Apr 3 01:29:26 PDT 2014


Hi Lang,

Can this commit be the culprit of Clang crashes we see on asan/msan
bootstrap buildbot?
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap/builds/2896/steps/check-all%20stage3%2Fmsan/logs/stdio


On Thu, Apr 3, 2014 at 2:59 AM, Lang Hames <lhames at gmail.com> wrote:

> Author: lhames
> Date: Wed Apr  2 17:59:58 2014
> New Revision: 205481
>
> URL: http://llvm.org/viewvc/llvm-project?rev=205481&view=rev
> Log:
> [CodeGen] Teach the peephole optimizer to remember (and exploit) all
> folding
> opportunities in the current basic block, rather than just the last one
> seen.
>
> <rdar://problem/16478629>
>
>
> Added:
>     llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll
> Modified:
>     llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
>
> Modified: llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?rev=205481&r1=205480&r2=205481&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp (original)
> +++ llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp Wed Apr  2 17:59:58 2014
> @@ -133,7 +133,8 @@ namespace {
>      bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
>                         SmallSet<unsigned, 4> &ImmDefRegs,
>                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
> -    bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
> +    bool isLoadFoldable(MachineInstr *MI,
> +                        SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
>    };
>  }
>
> @@ -489,8 +490,9 @@ bool PeepholeOptimizer::optimizeCopyOrBi
>  /// isLoadFoldable - Check whether MI is a candidate for folding into a
> later
>  /// instruction. We only fold loads to virtual registers and the virtual
>  /// register defined has a single use.
> -bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
> -                                       unsigned &FoldAsLoadDefReg) {
> +bool PeepholeOptimizer::isLoadFoldable(
> +                              MachineInstr *MI,
> +                              SmallSet<unsigned, 16>
> &FoldAsLoadDefCandidates) {
>    if (!MI->canFoldAsLoad() || !MI->mayLoad())
>      return false;
>    const MCInstrDesc &MCID = MI->getDesc();
> @@ -504,7 +506,7 @@ bool PeepholeOptimizer::isLoadFoldable(M
>    if (!MI->getOperand(0).getSubReg() &&
>        TargetRegisterInfo::isVirtualRegister(Reg) &&
>        MRI->hasOneNonDBGUse(Reg)) {
> -    FoldAsLoadDefReg = Reg;
> +    FoldAsLoadDefCandidates.insert(Reg);
>      return true;
>    }
>    return false;
> @@ -570,18 +572,14 @@ bool PeepholeOptimizer::runOnMachineFunc
>
>    bool Changed = false;
>
> -  SmallPtrSet<MachineInstr*, 8> LocalMIs;
> -  SmallSet<unsigned, 4> ImmDefRegs;
> -  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
> -  unsigned FoldAsLoadDefReg;
>    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E;
> ++I) {
>      MachineBasicBlock *MBB = &*I;
>
>      bool SeenMoveImm = false;
> -    LocalMIs.clear();
> -    ImmDefRegs.clear();
> -    ImmDefMIs.clear();
> -    FoldAsLoadDefReg = 0;
> +    SmallPtrSet<MachineInstr*, 8> LocalMIs;
> +    SmallSet<unsigned, 4> ImmDefRegs;
> +    DenseMap<unsigned, MachineInstr*> ImmDefMIs;
> +    SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
>
>      for (MachineBasicBlock::iterator
>             MII = I->begin(), MIE = I->end(); MII != MIE; ) {
> @@ -595,15 +593,15 @@ bool PeepholeOptimizer::runOnMachineFunc
>            continue;
>
>        // If there exists an instruction which belongs to the following
> -      // categories, we will discard the load candidate.
> +      // categories, we will discard the load candidates.
>        if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
>            MI->isKill() || MI->isInlineAsm() ||
>            MI->hasUnmodeledSideEffects()) {
> -        FoldAsLoadDefReg = 0;
> +        FoldAsLoadDefCandidates.clear();
>          continue;
>        }
>        if (MI->mayStore() || MI->isCall())
> -        FoldAsLoadDefReg = 0;
> +        FoldAsLoadDefCandidates.clear();
>
>        if (((MI->isBitcast() || MI->isCopy()) &&
> optimizeCopyOrBitcast(MI)) ||
>            (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
> @@ -630,30 +628,41 @@ bool PeepholeOptimizer::runOnMachineFunc
>        // Check whether MI is a load candidate for folding into a later
>        // instruction. If MI is not a candidate, check whether we can fold
> an
>        // earlier load into MI.
> -      if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
> +      if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
> +          !FoldAsLoadDefCandidates.empty()) {
>          // We need to fold load after optimizeCmpInstr, since
> optimizeCmpInstr
>          // can enable folding by converting SUB to CMP.
>          // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it
> and we
>          // need it for markUsesInDebugValueAsUndef().
> -        unsigned FoldedReg = FoldAsLoadDefReg;
> -        MachineInstr *DefMI = 0;
> -        MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
> -                                                      FoldAsLoadDefReg,
> DefMI);
> -        if (FoldMI) {
> -          // Update LocalMIs since we replaced MI with FoldMI and deleted
> DefMI.
> -          DEBUG(dbgs() << "Replacing: " << *MI);
> -          DEBUG(dbgs() << "     With: " << *FoldMI);
> -          LocalMIs.erase(MI);
> -          LocalMIs.erase(DefMI);
> -          LocalMIs.insert(FoldMI);
> -          MI->eraseFromParent();
> -          DefMI->eraseFromParent();
> -          MRI->markUsesInDebugValueAsUndef(FoldedReg);
> -          ++NumLoadFold;
> -
> -          // MI is replaced with FoldMI.
> -          Changed = true;
> -          continue;
> +        const MCInstrDesc &MIDesc = MI->getDesc();
> +        for (unsigned i = MIDesc.getNumDefs(); i !=
> MIDesc.getNumOperands();
> +             ++i) {
> +          const MachineOperand &MOp = MI->getOperand(i);
> +          if (!MOp.isReg())
> +            continue;
> +          unsigned TryFoldReg = MOp.getReg();
> +          if (FoldAsLoadDefCandidates.count(TryFoldReg)) {
> +            MachineInstr *DefMI = 0;
> +            MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
> TryFoldReg,
> +                                                          DefMI);
> +            if (FoldMI) {
> +              // Update LocalMIs since we replaced MI with FoldMI and
> deleted
> +              // DefMI.
> +              DEBUG(dbgs() << "Replacing: " << *MI);
> +              DEBUG(dbgs() << "     With: " << *FoldMI);
> +              LocalMIs.erase(MI);
> +              LocalMIs.erase(DefMI);
> +              LocalMIs.insert(FoldMI);
> +              MI->eraseFromParent();
> +              DefMI->eraseFromParent();
> +              MRI->markUsesInDebugValueAsUndef(TryFoldReg);
> +              FoldAsLoadDefCandidates.erase(TryFoldReg);
> +              ++NumLoadFold;
> +              // MI is replaced with FoldMI.
> +              Changed = true;
> +              break;
> +            }
> +          }
>          }
>        }
>      }
>
> Added: llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll?rev=205481&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll Wed Apr  2
> 17:59:58 2014
> @@ -0,0 +1,29 @@
> +; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s
> +;
> +; Test multiple peephole-time folds in a single basic block.
> +; <rdar://problem/16478629>
> +
> +define <8 x float> @test_peephole_multi_fold(<8 x float>* %p1, <8 x
> float>* %p2) {
> +entry:
> +  br label %loopbody
> +
> +loopbody:
> +; CHECK: _test_peephole_multi_fold:
> +; CHECK: vfmadd231ps (%rdi),
> +; CHECK: vfmadd231ps (%rsi),
> +  %vsum1 = phi <8 x float> [ %vsum1.next, %loopbody ], [ zeroinitializer,
> %entry ]
> +  %vsum2 = phi <8 x float> [ %vsum2.next, %loopbody ], [ zeroinitializer,
> %entry ]
> +  %m1 = load <8 x float>* %p1, align 1
> +  %m2 = load <8 x float>* %p2, align 1
> +  %vsum1.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x
> float> %m1, <8 x float> zeroinitializer, <8 x float> %vsum1)
> +  %vsum2.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x
> float> %m2, <8 x float> zeroinitializer, <8 x float> %vsum2)
> +  %vsum1.next.1 = extractelement <8 x float> %vsum1.next, i32 0
> +  %c = fcmp oeq float %vsum1.next.1, 0.0
> +  br i1 %c, label %loopbody, label %loopexit
> +
> +loopexit:
> +  %r = fadd <8 x float> %vsum1.next, %vsum2.next
> +  ret <8 x float> %r
> +}
> +
> +declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>,
> <8 x float>)
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>



-- 
Alexey Samsonov, MSK
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140403/8f1f3b31/attachment.html>


More information about the llvm-commits mailing list