[llvm] r207512 - [OPENMP][LV][D3423] Respect Hints.Force meta-data for loops in LoopVectorizer

Tue Apr 29 02:40:15 PDT 2014

hope r207514 fixes it.

On Tue, Apr 29, 2014 at 1:33 PM, Kostya Serebryany <kcc at google.com> wrote:

> This causes build breakage in release mode:
> lib/Transforms/Vectorize/LoopVectorize.cpp:5099:15: error: unused variable
> 'ScalarCost' [-Werror,-Wunused-variable]
>   const float ScalarCost = Cost;
>
> (The constant is used in DEBUG() statement which is void in Release mode)
>
>
>
> On Tue, Apr 29, 2014 at 12:55 PM, Zinovy Nis <zinovy.nis at gmail.com> wrote:
>
>> Author: zinovy.nis
>> Date: Tue Apr 29 03:55:11 2014
>> New Revision: 207512
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=207512&view=rev
>> Log:
>> [OPENMP][LV][D3423] Respect Hints.Force meta-data for loops in
>> LoopVectorizer
>>
>> Added:
>>     llvm/trunk/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
>>
>> llvm/trunk/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
>> Modified:
>>     llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>>
>> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=207512&r1=207511&r2=207512&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Tue Apr 29
>> 03:55:11 2014
>> @@ -819,7 +819,8 @@ public:
>>    /// then this vectorization factor will be selected if vectorization is
>>    /// possible.
>>    VectorizationFactor selectVectorizationFactor(bool OptForSize,
>> -                                                unsigned UserVF);
>> +                                                unsigned UserVF,
>> +                                                bool ForceVectorization);
>>
>>    /// \return The size (in bits) of the widest type in the code that
>>    /// needs to be vectorized. We ignore values that remain scalar such as
>> @@ -891,13 +892,17 @@ struct LoopVectorizeHints {
>>    unsigned Width;
>>    /// Vectorization unroll factor.
>>    unsigned Unroll;
>> -  /// Vectorization forced (-1 not selected, 0 force disabled, 1 force
>> enabled)
>> -  int Force;
>> +  /// Vectorization forced
>> +  enum ForceKind {
>> +    FK_Undefined = -1, ///< Not selected.
>> +    FK_Disabled = 0,   ///< Forcing disabled.
>> +    FK_Enabled = 1,    ///< Forcing enabled.
>> +  } Force;
>>
>>    LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
>>    : Width(VectorizationFactor)
>>    , Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
>> -  , Force(-1)
>> +  , Force(FK_Undefined)
>>    , LoopID(L->getLoopID()) {
>>      getHints(L);
>>      // The command line options override any loop metadata except for
>> when
>> @@ -1010,7 +1015,8 @@ private:
>>          DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
>>      } else if (Hint == "enable") {
>>        if (C->getBitWidth() == 1)
>> -        Force = Val;
>> +        Force = Val == 1 ? LoopVectorizeHints::FK_Enabled
>> +                         : LoopVectorizeHints::FK_Disabled;
>>        else
>>          DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n");
>>      } else {
>> @@ -1106,18 +1112,20 @@ struct LoopVectorize : public FunctionPa
>>      LoopVectorizeHints Hints(L, DisableUnrolling);
>>
>>      DEBUG(dbgs() << "LV: Loop hints:"
>> -                 << " force=" << (Hints.Force == 0
>> -                                      ? "disabled"
>> -                                      : (Hints.Force == 1 ? "enabled" :
>> "?"))
>> -                 << " width=" << Hints.Width << " unroll=" <<
>> Hints.Unroll
>> -                 << "\n");
>> +                 << " force="
>> +                 << (Hints.Force == LoopVectorizeHints::FK_Disabled
>> +                         ? "disabled"
>> +                         : (Hints.Force == LoopVectorizeHints::FK_Enabled
>> +                                ? "enabled"
>> +                                : "?")) << " width=" << Hints.Width
>> +                 << " unroll=" << Hints.Unroll << "\n");
>>
>> -    if (Hints.Force == 0) {
>> +    if (Hints.Force == LoopVectorizeHints::FK_Disabled) {
>>        DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize
>> disable.\n");
>>        return false;
>>      }
>>
>> -    if (!AlwaysVectorize && Hints.Force != 1) {
>> +    if (!AlwaysVectorize && Hints.Force !=
>> LoopVectorizeHints::FK_Enabled) {
>>        DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize
>> enable.\n");
>>        return false;
>>      }
>> @@ -1127,6 +1135,21 @@ struct LoopVectorize : public FunctionPa
>>        return false;
>>      }
>>
>> +    // Check the loop for a trip count threshold:
>> +    // do not vectorize loops with a tiny trip count.
>> +    BasicBlock *Latch = L->getLoopLatch();
>> +    const unsigned TC = SE->getSmallConstantTripCount(L, Latch);
>> +    if (TC > 0u && TC < TinyTripCountVectorThreshold) {
>> +      DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
>> +                   << "This loop is not worth vectorizing.");
>> +      if (Hints.Force == LoopVectorizeHints::FK_Enabled)
>> +        DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
>> +      else {
>> +        DEBUG(dbgs() << "\n");
>> +        return false;
>> +      }
>> +    }
>> +
>>      // Check if it is legal to vectorize the loop.
>>      LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
>>      if (!LVL.canVectorize()) {
>> @@ -1140,8 +1163,8 @@ struct LoopVectorize : public FunctionPa
>>      // Check the function attributes to find out if this function should
>> be
>>      // optimized for size.
>>      Function *F = L->getHeader()->getParent();
>> -    bool OptForSize =
>> -        Hints.Force != 1 &&
>> F->hasFnAttribute(Attribute::OptimizeForSize);
>> +    bool OptForSize = Hints.Force != LoopVectorizeHints::FK_Enabled &&
>> +                      F->hasFnAttribute(Attribute::OptimizeForSize);
>>
>>      // Compute the weighted frequency of this loop being executed and
>> see if it
>>      // is less than 20% of the function entry baseline frequency. Note
>> that we
>> @@ -1150,7 +1173,8 @@ struct LoopVectorize : public FunctionPa
>>      // exactly what block frequency models.
>>      if (LoopVectorizeWithBlockFrequency) {
>>        BlockFrequency LoopEntryFreq =
>> BFI->getBlockFreq(L->getLoopPreheader());
>> -      if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq)
>> +      if (Hints.Force != LoopVectorizeHints::FK_Enabled &&
>> +          LoopEntryFreq < ColdEntryFreq)
>>          OptForSize = true;
>>      }
>>
>> @@ -1166,7 +1190,10 @@ struct LoopVectorize : public FunctionPa
>>
>>      // Select the optimal vectorization factor.
>>      const LoopVectorizationCostModel::VectorizationFactor VF =
>> -                          CM.selectVectorizationFactor(OptForSize,
>> Hints.Width);
>> +        CM.selectVectorizationFactor(OptForSize, Hints.Width,
>> +                                     Hints.Force ==
>> +                                         LoopVectorizeHints::FK_Enabled);
>> +
>>      // Select the unroll factor.
>>      const unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll,
>> VF.Width,
>>                                          VF.Cost);
>> @@ -3300,15 +3327,6 @@ bool LoopVectorizationLegality::canVecto
>>      return false;
>>    }
>>
>> -  // Do not loop-vectorize loops with a tiny trip count.
>> -  BasicBlock *Latch = TheLoop->getLoopLatch();
>> -  unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
>> -  if (TC > 0u && TC < TinyTripCountVectorThreshold) {
>> -    DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
>> -          "This loop is not worth vectorizing.\n");
>> -    return false;
>> -  }
>> -
>>    // Check if we can vectorize the instructions and CFG in this loop.
>>    if (!canVectorizeInstrs()) {
>>      DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
>> @@ -5007,7 +5025,8 @@ bool LoopVectorizationLegality::blockCan
>>
>>  LoopVectorizationCostModel::VectorizationFactor
>>  LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
>> -                                                      unsigned UserVF) {
>> +                                                      unsigned UserVF,
>> +                                                      bool
>> ForceVectorization) {
>>    // Width 1 means no vectorize
>>    VectorizationFactor Factor = { 1U, 0U };
>>    if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
>> @@ -5077,8 +5096,16 @@ LoopVectorizationCostModel::selectVector
>>    }
>>
>>    float Cost = expectedCost(1);
>> +  const float ScalarCost = Cost;
>>    unsigned Width = 1;
>>    DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n");
>> +
>> +  // Ignore scalar width, because the user explicitly wants
>> vectorization.
>> +  if (ForceVectorization && VF > 1) {
>> +    Width = 2;
>> +    Cost = expectedCost(Width) / (float)Width;
>> +  }
>> +
>>    for (unsigned i=2; i <= VF; i*=2) {
>>      // Notice that the vector loop needs to be executed less times, so
>>      // we need to divide the cost of the vector loops by the width of
>> @@ -5092,6 +5119,9 @@ LoopVectorizationCostModel::selectVector
>>      }
>>    }
>>
>> +  DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs()
>> +        << "LV: Vectorization seems to be not beneficial, "
>> +        << "but was forced by a user.\n");
>>    DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n");
>>    Factor.Width = Width;
>>    Factor.Cost = Width * Cost;
>>
>> Added: llvm/trunk/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/vect.omp.force.ll?rev=207512&view=auto
>>
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopVectorize/X86/vect.omp.force.ll (added)
>> +++ llvm/trunk/test/Transforms/LoopVectorize/X86/vect.omp.force.ll Tue
>> Apr 29 03:55:11 2014
>> @@ -0,0 +1,93 @@
>> +; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0
>> -mcpu=corei7-avx -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
>> +; REQUIRES: asserts
>> +
>> +; CHECK: LV: Loop hints: force=enabled
>> +; CHECK: LV: Loop hints: force=?
>> +; No more loops in the module
>> +; CHECK-NOT: LV: Loop hints: force=
>> +; CHECK: 2 loop-vectorize               - Number of loops analyzed for
>> vectorization
>> +; CHECK: 1 loop-vectorize               - Number of loops vectorized
>> +
>> +target datalayout =
>> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>> +target triple = "x86_64-apple-macosx10.8.0"
>> +
>> +;
>> +; The source code for the test:
>> +;
>> +; #include <math.h>
>> +; void foo(float* restrict A, float * restrict B, int size)
>> +; {
>> +;   for (int i = 0; i < size; ++i) A[i] = sinf(B[i]);
>> +; }
>> +;
>> +
>> +;
>> +; This loop will be vectorized, although the scalar cost is lower than
>> any of vector costs, but vectorization is explicitly forced in metadata.
>> +;
>> +
>> +define void @vectorized(float* noalias nocapture %A, float* noalias
>> nocapture %B, i32 %size) {
>> +entry:
>> +  %cmp6 = icmp sgt i32 %size, 0
>> +  br i1 %cmp6, label %for.body.preheader, label %for.end
>> +
>> +for.body.preheader:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0,
>> %for.body.preheader ]
>> +  %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
>> +  %call = tail call float @llvm.sin.f32(float %0)
>> +  %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
>> +  store float %call, float* %arrayidx2, align 4,
>> !llvm.mem.parallel_loop_access !1
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>> +  %exitcond = icmp eq i32 %lftr.wideiv, %size
>> +  br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop
>> !1
>> +
>> +for.end.loopexit:
>> +  br label %for.end
>> +
>> +for.end:
>> +  ret void
>> +}
>> +
>> +!1 = metadata !{metadata !1, metadata !2}
>> +!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
>> +
>> +;
>> +; This method will not be vectorized, as scalar cost is lower than any
>> of vector costs.
>> +;
>> +
>> +define void @not_vectorized(float* noalias nocapture %A, float* noalias
>> nocapture %B, i32 %size) {
>> +entry:
>> +  %cmp6 = icmp sgt i32 %size, 0
>> +  br i1 %cmp6, label %for.body.preheader, label %for.end
>> +
>> +for.body.preheader:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0,
>> %for.body.preheader ]
>> +  %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
>> +  %call = tail call float @llvm.sin.f32(float %0)
>> +  %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
>> +  store float %call, float* %arrayidx2, align 4,
>> !llvm.mem.parallel_loop_access !3
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>> +  %exitcond = icmp eq i32 %lftr.wideiv, %size
>> +  br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop
>> !3
>> +
>> +for.end.loopexit:
>> +  br label %for.end
>> +
>> +for.end:
>> +  ret void
>> +}
>> +
>> +declare float @llvm.sin.f32(float) nounwind readnone
>> +
>> +; Dummy metadata
>> +!3 = metadata !{metadata !3}
>> +
>>
>> Added:
>> llvm/trunk/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll?rev=207512&view=auto
>>
>> ==============================================================================
>> ---
>> llvm/trunk/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
>> (added)
>> +++
>> llvm/trunk/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll Tue
>> Apr 29 03:55:11 2014
>> @@ -0,0 +1,73 @@
>> +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0
>> -mcpu=corei7-avx -debug-only=loop-vectorize -stats -S
>> -vectorizer-min-trip-count=21 2>&1 | FileCheck %s
>> +; REQUIRES: asserts
>> +
>> +; CHECK: LV: Loop hints: force=enabled
>> +; CHECK: LV: Loop hints: force=?
>> +; No more loops in the module
>> +; CHECK-NOT: LV: Loop hints: force=
>> +; CHECK: 2 loop-vectorize               - Number of loops analyzed for
>> vectorization
>> +; CHECK: 1 loop-vectorize               - Number of loops vectorized
>> +
>> +target datalayout =
>> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>> +target triple = "x86_64-apple-macosx10.8.0"
>> +
>> +;
>> +; The source code for the test:
>> +;
>> +; void foo(float* restrict A, float* restrict B)
>> +; {
>> +;     for (int i = 0; i < 20; ++i) A[i] += B[i];
>> +; }
>> +;
>> +
>> +;
>> +; This loop will be vectorized, although the trip count is below the
>> threshold, but vectorization is explicitly forced in metadata.
>> +;
>> +define void @vectorized(float* noalias nocapture %A, float* noalias
>> nocapture readonly %B) {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
>> +  %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
>> +  %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
>> +  %add = fadd fast float %0, %1
>> +  store float %add, float* %arrayidx2, align 4,
>> !llvm.mem.parallel_loop_access !1
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 20
>> +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
>> +
>> +for.end:
>> +  ret void
>> +}
>> +
>> +!1 = metadata !{metadata !1, metadata !2}
>> +!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
>> +
>> +;
>> +; This loop will not be vectorized as the trip count is below the
>> threshold.
>> +;
>> +define void @not_vectorized(float* noalias nocapture %A, float* noalias
>> nocapture readonly %B) {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:
>> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> +  %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
>> +  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
>> +  %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv
>> +  %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
>> +  %add = fadd fast float %0, %1
>> +  store float %add, float* %arrayidx2, align 4,
>> !llvm.mem.parallel_loop_access !3
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %exitcond = icmp eq i64 %indvars.iv.next, 20
>> +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
>> +
>> +for.end:
>> +  ret void
>> +}
>> +
>> +!3 = metadata !{metadata !3}
>> +
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140429/8f9eb32b/attachment.html>