[llvm] r290355 - Fix two bugs in the pipeliner in renaming phis in the prolog and epilog

Thu Dec 22 10:49:55 PST 2016

Author: kparzysz
Date: Thu Dec 22 12:49:55 2016
New Revision: 290355

URL: http://llvm.org/viewvc/llvm-project?rev=290355&view=rev
Log:
Fix two bugs in the pipeliner in renaming phis in the prolog and epilog

When the pipeliner is renaming phi values, it may need to iterate through
the phi operands to check for other phis. However, the pipeliner should
stop once it reaches a phi that is outside the pipelined loop.

Also, when the generateExistingPhis code is unable to reuse an existing
phi, the default code that computes the PhiOp2 is only to be used when
the pipeliner is generating the kernel. Otherwise, the phi may be a value
computed earlier in the same epilog.

Patch by Brendon Cahoon.

Added:
    llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi10.ll
    llvm/trunk/test/CodeGen/Hexagon/swp-prolog-phi4.ll
Modified:
    llvm/trunk/lib/CodeGen/MachinePipeliner.cpp

Modified: llvm/trunk/lib/CodeGen/MachinePipeliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachinePipeliner.cpp?rev=290355&r1=290354&r2=290355&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/MachinePipeliner.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachinePipeliner.cpp Thu Dec 22 12:49:55 2016
@@ -2676,7 +2676,7 @@ void SwingSchedulerDAG::generateExisting
               replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
             continue;
           }
-        } else if (StageDiff > 0 &&
+        } else if (InKernel && StageDiff > 0 &&
                    VRMap[CurStageNum - StageDiff - np].count(LoopVal))
           PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
       }
@@ -3216,7 +3216,7 @@ unsigned SwingSchedulerDAG::getPrevMapVa
       // The previous name is defined in the current stage when the instruction
       // order is swapped.
       PrevVal = VRMap[StageNum][LoopVal];
-    else if (!LoopInst->isPHI())
+    else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
       // The loop value hasn't yet been scheduled.
       PrevVal = LoopVal;
     else if (StageNum == PhiStage + 1)

Added: llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi10.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi10.ll?rev=290355&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi10.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi10.ll Thu Dec 22 12:49:55 2016
@@ -0,0 +1,88 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s
+; REQUIRES: asserts
+
+define void @test(i8* noalias nocapture readonly %src, i32 %srcStride) local_unnamed_addr #0 {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %src, i32 %srcStride
+  %add.ptr2 = getelementptr inbounds i8, i8* %add.ptr, i32 %srcStride
+  %add.ptr3 = getelementptr inbounds i8, i8* %add.ptr2, i32 %srcStride
+  br label %for.body9.epil
+
+for.body9.epil:
+  %inc.sink385.epil = phi i32 [ %add17.epil, %for.body9.epil ], [ 2, %entry ]
+  %sr.epil = phi i8 [ %0, %for.body9.epil ], [ undef, %entry ]
+  %sr431.epil = phi i8 [ %2, %for.body9.epil ], [ 0, %entry ]
+  %sr432.epil = phi i8 [ %sr431.epil, %for.body9.epil ], [ 0, %entry ]
+  %epil.iter = phi i32 [ %epil.iter.sub, %for.body9.epil ], [ undef, %entry ]
+  %sub11.epil = add i32 %inc.sink385.epil, -1
+  %add17.epil = add nuw i32 %inc.sink385.epil, 1
+  %conv19.epil = zext i8 %sr.epil to i32
+  %add21.epil = add i32 %inc.sink385.epil, 2
+  %arrayidx22.epil = getelementptr inbounds i8, i8* %src, i32 %add21.epil
+  %0 = load i8, i8* %arrayidx22.epil, align 1
+  %conv23.epil = zext i8 %0 to i32
+  %1 = load i8, i8* undef, align 1
+  %conv42.epil = zext i8 %1 to i32
+  %conv53.epil = zext i8 %sr432.epil to i32
+  %2 = load i8, i8* undef, align 1
+  %conv61.epil = zext i8 %2 to i32
+  %3 = load i8, i8* undef, align 1
+  %conv65.epil = zext i8 %3 to i32
+  %4 = load i8, i8* null, align 1
+  %conv69.epil = zext i8 %4 to i32
+  %5 = load i8, i8* undef, align 1
+  %conv72.epil = zext i8 %5 to i32
+  %6 = load i8, i8* undef, align 1
+  %conv76.epil = zext i8 %6 to i32
+  %7 = load i8, i8* undef, align 1
+  %conv80.epil = zext i8 %7 to i32
+  %8 = load i8, i8* undef, align 1
+  %conv84.epil = zext i8 %8 to i32
+  %9 = load i8, i8* undef, align 1
+  %conv88.epil = zext i8 %9 to i32
+  %10 = load i8, i8* undef, align 1
+  %conv91.epil = zext i8 %10 to i32
+  %11 = load i8, i8* undef, align 1
+  %conv95.epil = zext i8 %11 to i32
+  %12 = load i8, i8* undef, align 1
+  %conv99.epil = zext i8 %12 to i32
+  %add.epil = add nuw nsw i32 0, %conv19.epil
+  %add16.epil = add nuw nsw i32 %add.epil, 0
+  %add20.epil = add nuw nsw i32 %add16.epil, 0
+  %add24.epil = add nuw nsw i32 %add20.epil, 0
+  %add28.epil = add nuw nsw i32 %add24.epil, 0
+  %add32.epil = add nuw nsw i32 %add28.epil, 0
+  %add35.epil = add i32 %add32.epil, 0
+  %add39.epil = add i32 %add35.epil, 0
+  %add43.epil = add i32 %add39.epil, %conv53.epil
+  %add47.epil = add i32 %add43.epil, 0
+  %add51.epil = add i32 %add47.epil, 0
+  %add54.epil = add i32 %add51.epil, %conv23.epil
+  %add58.epil = add i32 %add54.epil, %conv42.epil
+  %add62.epil = add i32 %add58.epil, %conv61.epil
+  %add66.epil = add i32 %add62.epil, %conv65.epil
+  %add70.epil = add i32 %add66.epil, %conv69.epil
+  %add73.epil = add i32 %add70.epil, %conv72.epil
+  %add77.epil = add i32 %add73.epil, %conv76.epil
+  %add81.epil = add i32 %add77.epil, %conv80.epil
+  %add85.epil = add i32 %add81.epil, %conv84.epil
+  %add89.epil = add i32 %add85.epil, %conv88.epil
+  %add92.epil = add i32 %add89.epil, %conv91.epil
+  %add96.epil = add i32 %add92.epil, %conv95.epil
+  %add100.epil = add i32 %add96.epil, %conv99.epil
+  %mul.epil = mul nsw i32 %add100.epil, 2621
+  %add101.epil = add nsw i32 %mul.epil, 32768
+  %shr369.epil = lshr i32 %add101.epil, 16
+  %conv102.epil = trunc i32 %shr369.epil to i8
+  %arrayidx103.epil = getelementptr inbounds i8, i8* undef, i32 %inc.sink385.epil
+  store i8 %conv102.epil, i8* %arrayidx103.epil, align 1
+  %epil.iter.sub = add i32 %epil.iter, -1
+  %epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0
+  br i1 %epil.iter.cmp, label %for.end, label %for.body9.epil
+
+for.end:
+  unreachable
+}
+
+attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv5" "unsafe-fp-math"="false" "use-soft-float"="false" }
+

Added: llvm/trunk/test/CodeGen/Hexagon/swp-prolog-phi4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/swp-prolog-phi4.ll?rev=290355&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/swp-prolog-phi4.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/swp-prolog-phi4.ll Thu Dec 22 12:49:55 2016
@@ -0,0 +1,65 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -verify-machineinstrs < %s
+
+; Test that the name rewriter code doesn't chase the Phi operands for
+; Phis that do not occur in the loop that is being pipelined.
+
+define void @test(i32 %srcStride) local_unnamed_addr #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %add.ptr3.pn = phi i8* [ undef, %entry ], [ %src4.0394, %for.end ]
+  %src2.0390 = phi i8* [ undef, %entry ], [ %add.ptr3.pn, %for.end ]
+  %src4.0394 = getelementptr inbounds i8, i8* %add.ptr3.pn, i32 %srcStride
+  %sri414 = load i8, i8* undef, align 1
+  br i1 undef, label %for.body9.epil, label %for.body9.preheader.new
+
+for.body9.preheader.new:
+  br label %for.body9.epil
+
+for.body9.epil:
+  %inc.sink385.epil = phi i32 [ %add17.epil, %for.body9.epil ], [ 2, %for.body ], [ undef, %for.body9.preheader.new ]
+  %sr420.epil = phi i8 [ undef, %for.body9.epil ], [ %sri414, %for.body ], [ undef, %for.body9.preheader.new ]
+  %sr421.epil = phi i8 [ %sr420.epil, %for.body9.epil ], [ undef, %for.body ], [ undef, %for.body9.preheader.new ]
+  %sr422.epil = phi i8 [ %sr421.epil, %for.body9.epil ], [ 0, %for.body ], [ undef, %for.body9.preheader.new ]
+  %epil.iter = phi i32 [ %epil.iter.sub, %for.body9.epil ], [ undef, %for.body9.preheader.new ], [ undef, %for.body ]
+  %add17.epil = add nuw i32 %inc.sink385.epil, 1
+  %add21.epil = add i32 %inc.sink385.epil, 2
+  %arrayidx22.epil = getelementptr inbounds i8, i8* undef, i32 %add21.epil
+  %conv27.epil = zext i8 %sr422.epil to i32
+  %0 = load i8, i8* null, align 1
+  %conv61.epil = zext i8 %0 to i32
+  %arrayidx94.epil = getelementptr inbounds i8, i8* %src4.0394, i32 %add17.epil
+  %1 = load i8, i8* %arrayidx94.epil, align 1
+  %add35.epil = add i32 0, %conv27.epil
+  %add39.epil = add i32 %add35.epil, 0
+  %add43.epil = add i32 %add39.epil, 0
+  %add47.epil = add i32 %add43.epil, 0
+  %add51.epil = add i32 %add47.epil, 0
+  %add54.epil = add i32 %add51.epil, 0
+  %add58.epil = add i32 %add54.epil, 0
+  %add62.epil = add i32 %add58.epil, %conv61.epil
+  %add66.epil = add i32 %add62.epil, 0
+  %add70.epil = add i32 %add66.epil, 0
+  %add73.epil = add i32 %add70.epil, 0
+  %add77.epil = add i32 %add73.epil, 0
+  %add81.epil = add i32 %add77.epil, 0
+  %add85.epil = add i32 %add81.epil, 0
+  %add89.epil = add i32 %add85.epil, 0
+  %add92.epil = add i32 %add89.epil, 0
+  %add96.epil = add i32 %add92.epil, 0
+  %add100.epil = add i32 %add96.epil, 0
+  %mul.epil = mul nsw i32 %add100.epil, 2621
+  %add101.epil = add nsw i32 %mul.epil, 32768
+  %shr369.epil = lshr i32 %add101.epil, 16
+  %conv102.epil = trunc i32 %shr369.epil to i8
+  store i8 %conv102.epil, i8* undef, align 1
+  %epil.iter.sub = add i32 %epil.iter, -1
+  %epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0
+  br i1 %epil.iter.cmp, label %for.end, label %for.body9.epil
+
+for.end:
+  br label %for.body
+}
+
+attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv5" "unsafe-fp-math"="false" "use-soft-float"="false" }