[llvm] r193853 - LoopVectorizer: If dependency checks fail try runtime checks
Hal Finkel
hfinkel at anl.gov
Thu Oct 31 20:36:40 PDT 2013
Arnold,
FWIW, this looks related to a question that I asked earlier today: http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-October/067083.html
If a guard already exists (as in the example from my post), and the vectorizer also adds one, does anything after the vectorizer clean that up?
Thanks again,
Hal
----- Original Message -----
> Author: arnolds
> Date: Thu Oct 31 22:05:07 2013
> New Revision: 193853
>
> URL: http://llvm.org/viewvc/llvm-project?rev=193853&view=rev
> Log:
> LoopVectorizer: If dependency checks fail try runtime checks
>
> When a dependence check fails we can still try to vectorize loops
> with runtime
> array bounds checks.
>
> This helps linpack to vectorize a loop in dgefa. And we are back to
> 2x of the
> scalar performance on a corei7-avx.
>
> radar://15339680
>
> Modified:
> llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=193853&r1=193852&r2=193853&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Thu Oct 31
> 22:05:07 2013
> @@ -3061,7 +3061,7 @@ public:
> /// non-intersection.
> bool
> canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck
> &RtCheck,
> unsigned &NumComparisons, ScalarEvolution
> *SE,
> - Loop *TheLoop);
> + Loop *TheLoop, bool ShouldCheckStride =
> false);
>
> /// \brief Goes over all memory accesses, checks whether a RT
> check is needed
> /// and builds sets of dependent accesses.
> @@ -3075,6 +3075,7 @@ public:
> bool isRTCheckNeeded() { return IsRTCheckNeeded; }
>
> bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
> + void resetDepChecks() { CheckDeps.clear(); }
>
> MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
>
> @@ -3129,10 +3130,15 @@ static bool hasComputableBounds(ScalarEv
> return AR->isAffine();
> }
>
> +/// \brief Check the stride of the pointer and ensure that it does
> not wrap in
> +/// the address space.
> +static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value
> *Ptr,
> + const Loop *Lp);
> +
> bool AccessAnalysis::canCheckPtrAtRT(
> LoopVectorizationLegality::RuntimePointerCheck
> &RtCheck,
> unsigned &NumComparisons, ScalarEvolution
> *SE,
> - Loop *TheLoop) {
> + Loop *TheLoop, bool ShouldCheckStride) {
> // Find pointers with computable bounds. We are going to use this
> information
> // to place a runtime bound check.
> unsigned NumReadPtrChecks = 0;
> @@ -3160,7 +3166,10 @@ bool AccessAnalysis::canCheckPtrAtRT(
> else
> ++NumReadPtrChecks;
>
> - if (hasComputableBounds(SE, Ptr)) {
> + if (hasComputableBounds(SE, Ptr) &&
> + // When we run after a failing dependency check we have to
> make sure we
> + // don't have wrapping pointers.
> + (!ShouldCheckStride || isStridedPtr(SE, DL, Ptr, TheLoop) ==
> 1)) {
> // The id of the dependence set.
> unsigned DepId;
>
> @@ -3342,8 +3351,9 @@ public:
> typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
> typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
>
> - MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop
> *L) :
> - SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0) {}
> + MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop
> *L)
> + : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
> + ShouldRetryWithRuntimeCheck(false) {}
>
> /// \brief Register the location (instructions are given
> increasing numbers)
> /// of a write access.
> @@ -3373,6 +3383,10 @@ public:
> /// the accesses safely with.
> unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
>
> + /// \brief In same cases when the dependency check fails we can
> still
> + /// vectorize the loop with a dynamic array access check.
> + bool shouldRetryWithRuntimeCheck() { return
> ShouldRetryWithRuntimeCheck; }
> +
> private:
> ScalarEvolution *SE;
> DataLayout *DL;
> @@ -3390,6 +3404,10 @@ private:
> // We can access this many bytes in parallel safely.
> unsigned MaxSafeDepDistBytes;
>
> + /// \brief If we see a non constant dependence distance we can
> still try to
> + /// vectorize this loop with runtime checks.
> + bool ShouldRetryWithRuntimeCheck;
> +
> /// \brief Check whether there is a plausible dependence between
> the two
> /// accesses.
> ///
> @@ -3587,6 +3605,7 @@ bool MemoryDepChecker::isDependent(const
> const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
> if (!C) {
> DEBUG(dbgs() << "LV: Dependence because of non constant
> distance\n");
> + ShouldRetryWithRuntimeCheck = true;
> return true;
> }
>
> @@ -3876,6 +3895,29 @@ bool LoopVectorizationLegality::canVecto
> CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
> Accesses.getDependenciesToCheck());
> MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
> +
> + if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
> + DEBUG(dbgs() << "LV: Retrying with memory checks\n");
> + NeedRTCheck = true;
> +
> + // Clear the dependency checks. We assume they are not needed.
> + Accesses.resetDepChecks();
> +
> + PtrRtCheck.reset();
> + PtrRtCheck.Need = true;
> +
> + CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons,
> SE,
> + TheLoop, true);
> + // Check that we did not collect too many pointers or found an
> unsizeable
> + // pointer.
> + if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold)
> {
> + DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
> + PtrRtCheck.reset();
> + return false;
> + }
> +
> + CanVecMem = true;
> + }
> }
>
> DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
>
> Modified: llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll?rev=193853&r1=193852&r2=193853&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll
> (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll Thu Oct
> 31 22:05:07 2013
> @@ -34,3 +34,31 @@ for.body:
> for.end: ; preds =
> %for.body, %entry
> ret i32 undef
> }
> +
> +; Make sure that we try to vectorize loops with a runtime check if
> the
> +; dependency check fails.
> +
> +; CHECK-LABEL: test_runtime_check
> +; CHECK: <4 x float>
> +define void @test_runtime_check(float* %a, float %b, i64 %offset,
> i64 %offset2, i64 %n) {
> +entry:
> + br label %for.body
> +
> +for.body:
> + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
> + %ind.sum = add i64 %iv, %offset
> + %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
> + %l1 = load float* %arr.idx, align 4
> + %ind.sum2 = add i64 %iv, %offset2
> + %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
> + %l2 = load float* %arr.idx2, align 4
> + %m = fmul fast float %b, %l2
> + %ad = fadd fast float %l1, %m
> + store float %ad, float* %arr.idx, align 4
> + %iv.next = add nuw nsw i64 %iv, 1
> + %exitcond = icmp eq i64 %iv.next, %n
> + br i1 %exitcond, label %loopexit, label %for.body
> +
> +loopexit:
> + ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
--
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory
More information about the llvm-commits
mailing list