[llvm] r193853 - LoopVectorizer: If dependency checks fail try runtime checks

Thu Oct 31 20:36:40 PDT 2013

Arnold,

FWIW, this looks related to a question that I asked earlier today: http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-October/067083.html

If a guard already exists (as in the example from my post), and the vectorizer also adds one, does anything after the vectorizer clean that up?

Thanks again,
Hal

----- Original Message -----
> Author: arnolds
> Date: Thu Oct 31 22:05:07 2013
> New Revision: 193853
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=193853&view=rev
> Log:
> LoopVectorizer: If dependency checks fail try runtime checks
> 
> When a dependence check fails we can still try to vectorize loops
> with runtime
> array bounds checks.
> 
> This helps linpack to vectorize a loop in dgefa. And we are back to
> 2x of the
> scalar performance on a corei7-avx.
> 
> radar://15339680
> 
> Modified:
>     llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>     llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll
> 
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=193853&r1=193852&r2=193853&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Thu Oct 31
> 22:05:07 2013
> @@ -3061,7 +3061,7 @@ public:
>    /// non-intersection.
>    bool
>    canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck
>    &RtCheck,
>                         unsigned &NumComparisons, ScalarEvolution
>                         *SE,
> -                       Loop *TheLoop);
> +                       Loop *TheLoop, bool ShouldCheckStride =
> false);
>  
>    /// \brief Goes over all memory accesses, checks whether a RT
>    check is needed
>    /// and builds sets of dependent accesses.
> @@ -3075,6 +3075,7 @@ public:
>    bool isRTCheckNeeded() { return IsRTCheckNeeded; }
>  
>    bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
> +  void resetDepChecks() { CheckDeps.clear(); }
>  
>    MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
>  
> @@ -3129,10 +3130,15 @@ static bool hasComputableBounds(ScalarEv
>    return AR->isAffine();
>  }
>  
> +/// \brief Check the stride of the pointer and ensure that it does
> not wrap in
> +/// the address space.
> +static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value
> *Ptr,
> +                        const Loop *Lp);
> +
>  bool AccessAnalysis::canCheckPtrAtRT(
>                         LoopVectorizationLegality::RuntimePointerCheck
>                         &RtCheck,
>                          unsigned &NumComparisons, ScalarEvolution
>                          *SE,
> -                        Loop *TheLoop) {
> +                        Loop *TheLoop, bool ShouldCheckStride) {
>    // Find pointers with computable bounds. We are going to use this
>    information
>    // to place a runtime bound check.
>    unsigned NumReadPtrChecks = 0;
> @@ -3160,7 +3166,10 @@ bool AccessAnalysis::canCheckPtrAtRT(
>      else
>        ++NumReadPtrChecks;
>  
> -    if (hasComputableBounds(SE, Ptr)) {
> +    if (hasComputableBounds(SE, Ptr) &&
> +        // When we run after a failing dependency check we have to
> make sure we
> +        // don't have wrapping pointers.
> +        (!ShouldCheckStride || isStridedPtr(SE, DL, Ptr, TheLoop) ==
> 1)) {
>        // The id of the dependence set.
>        unsigned DepId;
>  
> @@ -3342,8 +3351,9 @@ public:
>    typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
>    typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
>  
> -  MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop
> *L) :
> -    SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0) {}
> +  MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop
> *L)
> +      : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
> +        ShouldRetryWithRuntimeCheck(false) {}
>  
>    /// \brief Register the location (instructions are given
>    increasing numbers)
>    /// of a write access.
> @@ -3373,6 +3383,10 @@ public:
>    /// the accesses safely with.
>    unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
>  
> +  /// \brief In same cases when the dependency check fails we can
> still
> +  /// vectorize the loop with a dynamic array access check.
> +  bool shouldRetryWithRuntimeCheck() { return
> ShouldRetryWithRuntimeCheck; }
> +
>  private:
>    ScalarEvolution *SE;
>    DataLayout *DL;
> @@ -3390,6 +3404,10 @@ private:
>    // We can access this many bytes in parallel safely.
>    unsigned MaxSafeDepDistBytes;
>  
> +  /// \brief If we see a non constant dependence distance we can
> still try to
> +  /// vectorize this loop with runtime checks.
> +  bool ShouldRetryWithRuntimeCheck;
> +
>    /// \brief Check whether there is a plausible dependence between
>    the two
>    /// accesses.
>    ///
> @@ -3587,6 +3605,7 @@ bool MemoryDepChecker::isDependent(const
>    const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
>    if (!C) {
>      DEBUG(dbgs() << "LV: Dependence because of non constant
>      distance\n");
> +    ShouldRetryWithRuntimeCheck = true;
>      return true;
>    }
>  
> @@ -3876,6 +3895,29 @@ bool LoopVectorizationLegality::canVecto
>      CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
>                                         Accesses.getDependenciesToCheck());
>      MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
> +
> +    if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
> +      DEBUG(dbgs() << "LV: Retrying with memory checks\n");
> +      NeedRTCheck = true;
> +
> +      // Clear the dependency checks. We assume they are not needed.
> +      Accesses.resetDepChecks();
> +
> +      PtrRtCheck.reset();
> +      PtrRtCheck.Need = true;
> +
> +      CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons,
> SE,
> +                                         TheLoop, true);
> +      // Check that we did not collect too many pointers or found an
> unsizeable
> +      // pointer.
> +      if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold)
> {
> +        DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
> +        PtrRtCheck.reset();
> +        return false;
> +      }
> +
> +      CanVecMem = true;
> +    }
>    }
>  
>    DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll?rev=193853&r1=193852&r2=193853&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll
> (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll Thu Oct
> 31 22:05:07 2013
> @@ -34,3 +34,31 @@ for.body:
>  for.end:                                          ; preds =
>  %for.body, %entry
>    ret i32 undef
>  }
> +
> +; Make sure that we try to vectorize loops with a runtime check if
> the
> +; dependency check fails.
> +
> +; CHECK-LABEL: test_runtime_check
> +; CHECK:      <4 x float>
> +define void @test_runtime_check(float* %a, float %b, i64 %offset,
> i64 %offset2, i64 %n) {
> +entry:
> +  br label %for.body
> +
> +for.body:
> +  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
> +  %ind.sum = add i64 %iv, %offset
> +  %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
> +  %l1 = load float* %arr.idx, align 4
> +  %ind.sum2 = add i64 %iv, %offset2
> +  %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
> +  %l2 = load float* %arr.idx2, align 4
> +  %m = fmul fast float %b, %l2
> +  %ad = fadd fast float %l1, %m
> +  store float %ad, float* %arr.idx, align 4
> +  %iv.next = add nuw nsw i64 %iv, 1
> +  %exitcond = icmp eq i64 %iv.next, %n
> +  br i1 %exitcond, label %loopexit, label %for.body
> +
> +loopexit:
> +  ret void
> +}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 

-- 
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory