<div dir="ltr">No worries, and yes - this was probably the cause. :)</div><div class="gmail_extra"><br><br><div class="gmail_quote">On Thu, Apr 3, 2014 at 1:56 AM, Alexey Samsonov <span dir="ltr"><<a href="mailto:samsonov@google.com" target="_blank">samsonov@google.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">Sorry, I didn't notice that it had been fixed already.</div><div class="gmail_extra"><div><div class="h5">
<br><br><div class="gmail_quote">On Thu, Apr 3, 2014 at 12:29 PM, Alexey Samsonov <span dir="ltr"><<a href="mailto:samsonov@google.com" target="_blank">samsonov@google.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">Hi Lang,<div><br></div><div>Can this commit be the culprit of Clang crashes we see on asan/msan bootstrap buildbot?</div>

<div><a href="http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap/builds/2896/steps/check-all%20stage3%2Fmsan/logs/stdio" target="_blank">http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-bootstrap/builds/2896/steps/check-all%20stage3%2Fmsan/logs/stdio</a></div>


</div><div class="gmail_extra"><div><div><br><br><div class="gmail_quote">On Thu, Apr 3, 2014 at 2:59 AM, Lang Hames <span dir="ltr"><<a href="mailto:lhames@gmail.com" target="_blank">lhames@gmail.com</a>></span> wrote:<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Author: lhames<br>
Date: Wed Apr  2 17:59:58 2014<br>
New Revision: 205481<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=205481&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=205481&view=rev</a><br>
Log:<br>
[CodeGen] Teach the peephole optimizer to remember (and exploit) all folding<br>
opportunities in the current basic block, rather than just the last one seen.<br>
<br>
<rdar://problem/<a href="tel:16478629" value="+4916478629" target="_blank">16478629</a>><br>
<br>
<br>
Added:<br>
    llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll<br>
Modified:<br>
    llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp<br>
<br>
Modified: llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?rev=205481&r1=205480&r2=205481&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?rev=205481&r1=205480&r2=205481&view=diff</a><br>



==============================================================================<br>
--- llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp (original)<br>
+++ llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp Wed Apr  2 17:59:58 2014<br>
@@ -133,7 +133,8 @@ namespace {<br>
     bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,<br>
                        SmallSet<unsigned, 4> &ImmDefRegs,<br>
                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);<br>
-    bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);<br>
+    bool isLoadFoldable(MachineInstr *MI,<br>
+                        SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);<br>
   };<br>
 }<br>
<br>
@@ -489,8 +490,9 @@ bool PeepholeOptimizer::optimizeCopyOrBi<br>
 /// isLoadFoldable - Check whether MI is a candidate for folding into a later<br>
 /// instruction. We only fold loads to virtual registers and the virtual<br>
 /// register defined has a single use.<br>
-bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,<br>
-                                       unsigned &FoldAsLoadDefReg) {<br>
+bool PeepholeOptimizer::isLoadFoldable(<br>
+                              MachineInstr *MI,<br>
+                              SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {<br>
   if (!MI->canFoldAsLoad() || !MI->mayLoad())<br>
     return false;<br>
   const MCInstrDesc &MCID = MI->getDesc();<br>
@@ -504,7 +506,7 @@ bool PeepholeOptimizer::isLoadFoldable(M<br>
   if (!MI->getOperand(0).getSubReg() &&<br>
       TargetRegisterInfo::isVirtualRegister(Reg) &&<br>
       MRI->hasOneNonDBGUse(Reg)) {<br>
-    FoldAsLoadDefReg = Reg;<br>
+    FoldAsLoadDefCandidates.insert(Reg);<br>
     return true;<br>
   }<br>
   return false;<br>
@@ -570,18 +572,14 @@ bool PeepholeOptimizer::runOnMachineFunc<br>
<br>
   bool Changed = false;<br>
<br>
-  SmallPtrSet<MachineInstr*, 8> LocalMIs;<br>
-  SmallSet<unsigned, 4> ImmDefRegs;<br>
-  DenseMap<unsigned, MachineInstr*> ImmDefMIs;<br>
-  unsigned FoldAsLoadDefReg;<br>
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {<br>
     MachineBasicBlock *MBB = &*I;<br>
<br>
     bool SeenMoveImm = false;<br>
-    LocalMIs.clear();<br>
-    ImmDefRegs.clear();<br>
-    ImmDefMIs.clear();<br>
-    FoldAsLoadDefReg = 0;<br>
+    SmallPtrSet<MachineInstr*, 8> LocalMIs;<br>
+    SmallSet<unsigned, 4> ImmDefRegs;<br>
+    DenseMap<unsigned, MachineInstr*> ImmDefMIs;<br>
+    SmallSet<unsigned, 16> FoldAsLoadDefCandidates;<br>
<br>
     for (MachineBasicBlock::iterator<br>
            MII = I->begin(), MIE = I->end(); MII != MIE; ) {<br>
@@ -595,15 +593,15 @@ bool PeepholeOptimizer::runOnMachineFunc<br>
           continue;<br>
<br>
       // If there exists an instruction which belongs to the following<br>
-      // categories, we will discard the load candidate.<br>
+      // categories, we will discard the load candidates.<br>
       if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||<br>
           MI->isKill() || MI->isInlineAsm() ||<br>
           MI->hasUnmodeledSideEffects()) {<br>
-        FoldAsLoadDefReg = 0;<br>
+        FoldAsLoadDefCandidates.clear();<br>
         continue;<br>
       }<br>
       if (MI->mayStore() || MI->isCall())<br>
-        FoldAsLoadDefReg = 0;<br>
+        FoldAsLoadDefCandidates.clear();<br>
<br>
       if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||<br>
           (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||<br>
@@ -630,30 +628,41 @@ bool PeepholeOptimizer::runOnMachineFunc<br>
       // Check whether MI is a load candidate for folding into a later<br>
       // instruction. If MI is not a candidate, check whether we can fold an<br>
       // earlier load into MI.<br>
-      if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {<br>
+      if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&<br>
+          !FoldAsLoadDefCandidates.empty()) {<br>
         // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr<br>
         // can enable folding by converting SUB to CMP.<br>
         // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and we<br>
         // need it for markUsesInDebugValueAsUndef().<br>
-        unsigned FoldedReg = FoldAsLoadDefReg;<br>
-        MachineInstr *DefMI = 0;<br>
-        MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,<br>
-                                                      FoldAsLoadDefReg, DefMI);<br>
-        if (FoldMI) {<br>
-          // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.<br>
-          DEBUG(dbgs() << "Replacing: " << *MI);<br>
-          DEBUG(dbgs() << "     With: " << *FoldMI);<br>
-          LocalMIs.erase(MI);<br>
-          LocalMIs.erase(DefMI);<br>
-          LocalMIs.insert(FoldMI);<br>
-          MI->eraseFromParent();<br>
-          DefMI->eraseFromParent();<br>
-          MRI->markUsesInDebugValueAsUndef(FoldedReg);<br>
-          ++NumLoadFold;<br>
-<br>
-          // MI is replaced with FoldMI.<br>
-          Changed = true;<br>
-          continue;<br>
+        const MCInstrDesc &MIDesc = MI->getDesc();<br>
+        for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();<br>
+             ++i) {<br>
+          const MachineOperand &MOp = MI->getOperand(i);<br>
+          if (!MOp.isReg())<br>
+            continue;<br>
+          unsigned TryFoldReg = MOp.getReg();<br>
+          if (FoldAsLoadDefCandidates.count(TryFoldReg)) {<br>
+            MachineInstr *DefMI = 0;<br>
+            MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, TryFoldReg,<br>
+                                                          DefMI);<br>
+            if (FoldMI) {<br>
+              // Update LocalMIs since we replaced MI with FoldMI and deleted<br>
+              // DefMI.<br>
+              DEBUG(dbgs() << "Replacing: " << *MI);<br>
+              DEBUG(dbgs() << "     With: " << *FoldMI);<br>
+              LocalMIs.erase(MI);<br>
+              LocalMIs.erase(DefMI);<br>
+              LocalMIs.insert(FoldMI);<br>
+              MI->eraseFromParent();<br>
+              DefMI->eraseFromParent();<br>
+              MRI->markUsesInDebugValueAsUndef(TryFoldReg);<br>
+              FoldAsLoadDefCandidates.erase(TryFoldReg);<br>
+              ++NumLoadFold;<br>
+              // MI is replaced with FoldMI.<br>
+              Changed = true;<br>
+              break;<br>
+            }<br>
+          }<br>
         }<br>
       }<br>
     }<br>
<br>
Added: llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll?rev=205481&view=auto" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll?rev=205481&view=auto</a><br>



==============================================================================<br>
--- llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll (added)<br>
+++ llvm/trunk/test/CodeGen/X86/peephole-multiple-folds.ll Wed Apr  2 17:59:58 2014<br>
@@ -0,0 +1,29 @@<br>
+; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s<br>
+;<br>
+; Test multiple peephole-time folds in a single basic block.<br>
+; <rdar://problem/<a href="tel:16478629" value="+4916478629" target="_blank">16478629</a>><br>
+<br>
+define <8 x float> @test_peephole_multi_fold(<8 x float>* %p1, <8 x float>* %p2) {<br>
+entry:<br>
+  br label %loopbody<br>
+<br>
+loopbody:<br>
+; CHECK: _test_peephole_multi_fold:<br>
+; CHECK: vfmadd231ps (%rdi),<br>
+; CHECK: vfmadd231ps (%rsi),<br>
+  %vsum1 = phi <8 x float> [ %vsum1.next, %loopbody ], [ zeroinitializer, %entry ]<br>
+  %vsum2 = phi <8 x float> [ %vsum2.next, %loopbody ], [ zeroinitializer, %entry ]<br>
+  %m1 = load <8 x float>* %p1, align 1<br>
+  %m2 = load <8 x float>* %p2, align 1<br>
+  %vsum1.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m1, <8 x float> zeroinitializer, <8 x float> %vsum1)<br>
+  %vsum2.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m2, <8 x float> zeroinitializer, <8 x float> %vsum2)<br>
+  %vsum1.next.1 = extractelement <8 x float> %vsum1.next, i32 0<br>
+  %c = fcmp oeq float %vsum1.next.1, 0.0<br>
+  br i1 %c, label %loopbody, label %loopexit<br>
+<br>
+loopexit:<br>
+  %r = fadd <8 x float> %vsum1.next, %vsum2.next<br>
+  ret <8 x float> %r<br>
+}<br>
+<br>
+declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu" target="_blank">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br><br clear="all"><div><br></div></div></div><span><font color="#888888">-- <br><div>Alexey Samsonov, MSK</div>
</font></span></div>
</blockquote></div><br><br clear="all"><div><br></div></div></div><span class="HOEnZb"><font color="#888888">-- <br><div>Alexey Samsonov, MSK</div>
</font></span></div>
</blockquote></div><br></div>