[llvm] r265800 - [LoopVectorize] Register cloned assumptions

Fri Apr 8 09:37:10 PDT 2016

Author: majnemer
Date: Fri Apr  8 11:37:10 2016
New Revision: 265800

URL: http://llvm.org/viewvc/llvm-project?rev=265800&view=rev
Log:
[LoopVectorize] Register cloned assumptions

InstCombine cannot effectively remove redundant assumptions without them
registered in the assumption cache.  The vectorizer can create identical
assumptions but doesn't register them with the cache, resulting in
slower compile times because InstCombine tries to reason about a lot
more assumptions.

Fix this by registering the cloned assumptions.

Added:
    llvm/trunk/test/Transforms/LoopVectorize/X86/register-assumption.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=265800&r1=265799&r2=265800&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Fri Apr  8 11:37:10 2016
@@ -314,13 +314,13 @@ public:
   InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
                       LoopInfo *LI, DominatorTree *DT,
                       const TargetLibraryInfo *TLI,
-                      const TargetTransformInfo *TTI, unsigned VecWidth,
-                      unsigned UnrollFactor)
+                      const TargetTransformInfo *TTI, AssumptionCache *AC,
+                      unsigned VecWidth, unsigned UnrollFactor)
       : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
-        VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()),
-        Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
-        TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr),
-        AddedSafetyChecks(false) {}
+        AC(AC), VF(VecWidth), UF(UnrollFactor),
+        Builder(PSE.getSE()->getContext()), Induction(nullptr),
+        OldInduction(nullptr), WidenMap(UnrollFactor), TripCount(nullptr),
+        VectorTripCount(nullptr), Legal(nullptr), AddedSafetyChecks(false) {}
 
   // Perform the actual loop widening (vectorization).
   // MinimumBitWidths maps scalar integer values to the smallest bitwidth they
@@ -524,6 +524,8 @@ protected:
   const TargetLibraryInfo *TLI;
   /// Target Transform Info.
   const TargetTransformInfo *TTI;
+  /// Assumption Cache.
+  AssumptionCache *AC;
 
   /// \brief LoopVersioning.  It's only set up (non-null) if memchecks were
   /// used.
@@ -591,8 +593,10 @@ public:
   InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
                     LoopInfo *LI, DominatorTree *DT,
                     const TargetLibraryInfo *TLI,
-                    const TargetTransformInfo *TTI, unsigned UnrollFactor)
-      : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
+                    const TargetTransformInfo *TTI, AssumptionCache *AC,
+                    unsigned UnrollFactor)
+      : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, 1,
+                            UnrollFactor) {}
 
 private:
   void scalarizeInstruction(Instruction *Instr,
@@ -1957,7 +1961,7 @@ struct LoopVectorize : public FunctionPa
       assert(IC > 1 && "interleave count should not be 1 or 0");
       // If we decided that it is not legal to vectorize the loop then
       // interleave it.
-      InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, IC);
+      InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, IC);
       Unroller.vectorize(&LVL, CM.MinBWs);
 
       emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
@@ -1965,7 +1969,7 @@ struct LoopVectorize : public FunctionPa
                                  Twine(IC) + ")");
     } else {
       // If we decided that it is *legal* to vectorize the loop then do it.
-      InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, VF.Width, IC);
+      InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, VF.Width, IC);
       LB.vectorize(&LVL, CM.MinBWs);
       ++LoopsVectorized;
 
@@ -2728,6 +2732,11 @@ void InnerLoopVectorizer::scalarizeInstr
       // Place the cloned scalar in the new loop.
       Builder.Insert(Cloned);
 
+      // If we just cloned a new assumption, add it the assumption cache.
+      if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
+        if (II->getIntrinsicID() == Intrinsic::assume)
+          AC->registerAssumption(II);
+
       // If the original scalar returns a value we need to place it in a vector
       // so that future users will be able to use it.
       if (!IsVoidRetTy)
@@ -6096,6 +6105,11 @@ void InnerLoopUnroller::scalarizeInstruc
       // Place the cloned scalar in the new loop.
       Builder.Insert(Cloned);
 
+      // If we just cloned a new assumption, add it the assumption cache.
+      if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
+        if (II->getIntrinsicID() == Intrinsic::assume)
+          AC->registerAssumption(II);
+
       // If the original scalar returns a value we need to place it in a vector
       // so that future users will be able to use it.
       if (!IsVoidRetTy)

Added: llvm/trunk/test/Transforms/LoopVectorize/X86/register-assumption.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/register-assumption.ll?rev=265800&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/register-assumption.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/register-assumption.ll Fri Apr  8 11:37:10 2016
@@ -0,0 +1,32 @@
+; RUN: opt < %s  -loop-vectorize -instcombine -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test1() {
+entry:
+  %alloca = alloca float, align 4
+  br label %loop_exit.dim.11.critedge
+
+loop_exit.dim.11.critedge:                        ; preds = %loop_body.dim.0
+  %ptrint = ptrtoint float* %alloca to i64
+  %maskedptr = and i64 %ptrint, 4
+  %maskcond = icmp eq i64 %maskedptr, 0
+  br label %loop_header.dim.017.preheader
+
+loop_header.dim.017.preheader:                    ; preds = %loop_exit.dim.016, %loop_exit.dim.11.critedge
+  br label %loop_body.dim.018
+
+loop_body.dim.018:                                ; preds = %loop_body.dim.018, %loop_header.dim.017.preheader
+  %invar_address.dim.019.0135 = phi i64 [ 0, %loop_header.dim.017.preheader ], [ %0, %loop_body.dim.018 ]
+  call void @llvm.assume(i1 %maskcond)
+; CHECK:     call void @llvm.assume(
+; CHECK-NOT: call void @llvm.assume(
+  %0 = add nuw nsw i64 %invar_address.dim.019.0135, 1
+  %1 = icmp eq i64 %0, 256
+  br i1 %1, label %loop_header.dim.017.preheader, label %loop_body.dim.018
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #0
+
+attributes #0 = { nounwind }