[llvm] r210721 - Teach LoopUnrollPass to respect loop unrolling hints in metadata.

Hal Finkel hfinkel at anl.gov
Thu Jun 12 02:43:06 PDT 2014


----- Original Message -----
> From: "Eli Bendersky" <eliben at google.com>
> To: llvm-commits at cs.uiuc.edu
> Sent: Wednesday, June 11, 2014 6:15:35 PM
> Subject: [llvm] r210721 - Teach LoopUnrollPass to respect loop unrolling hints	in metadata.
> 
> Author: eliben
> Date: Wed Jun 11 18:15:35 2014
> New Revision: 210721
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=210721&view=rev
> Log:
> Teach LoopUnrollPass to respect loop unrolling hints in metadata.
> 
> See http://reviews.llvm.org/D4090 for more details.
> 
> The Clang change that produces this metadata was committed in r210667
> 
> Patch by Mark Heffernan.
> 
> 
> Added:
>     llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
> Modified:
>     llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
> 
> Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=210721&r1=210720&r2=210721&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Wed Jun 11
> 18:15:35 2014
> @@ -20,6 +20,7 @@
>  #include "llvm/IR/DataLayout.h"
>  #include "llvm/IR/Dominators.h"
>  #include "llvm/IR/IntrinsicInst.h"
> +#include "llvm/IR/Metadata.h"
>  #include "llvm/Support/CommandLine.h"
>  #include "llvm/Support/Debug.h"
>  #include "llvm/Support/raw_ostream.h"
> @@ -36,7 +37,8 @@ UnrollThreshold("unroll-threshold", cl::
>  
>  static cl::opt<unsigned>
>  UnrollCount("unroll-count", cl::init(0), cl::Hidden,
> -  cl::desc("Use this unroll count for all loops, for testing
> purposes"));
> +  cl::desc("Use this unroll count for all loops including those with
> "
> +           "unroll_count pragma values, for testing purposes"));
>  
>  static cl::opt<bool>
>  UnrollAllowPartial("unroll-allow-partial", cl::init(false),
>  cl::Hidden,
> @@ -47,6 +49,13 @@ static cl::opt<bool>
>  UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false),
>  cl::Hidden,
>    cl::desc("Unroll loops with run-time trip counts"));
>  
> +// Maximum allowed unroll count for a loop being fully unrolled
> +// because of a pragma unroll(enable) statement (ie, metadata
> +// "llvm.loopunroll.enable" is true).  This prevents unexpected
> +// behavior like crashing when using this pragma on high trip count
> +// loops.
> +static const unsigned PragmaFullUnrollCountLimit = 1024;

Please make this a cl::opt! Also, it needs to be a limit on the final loop size, not on the unroll count itself (see the PartialThreshold variable near the top of runOnLoop).

> +
>  namespace {
>    class LoopUnroll : public LoopPass {
>    public:
> @@ -151,6 +160,63 @@ static unsigned ApproximateLoopSize(cons
>    return LoopSize;
>  }
>  
> +// Returns the value associated with the given metadata node name
> (for
> +// example, "llvm.loopunroll.count").  If no such named metadata
> node
> +// exists, then nullptr is returned.
> +static const ConstantInt *GetUnrollMetadataValue(const Loop *L,
> +                                                 StringRef Name) {
> +  MDNode *LoopID = L->getLoopID();
> +  if (!LoopID) return nullptr;
> +
> +  // First operand should refer to the loop id itself.
> +  assert(LoopID->getNumOperands() > 0 && "requires at least one
> operand");
> +  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
> +
> +  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
> +    const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
> +    if (!MD) continue;
> +
> +    const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
> +    if (!S) continue;
> +
> +    if (Name.equals(S->getString())) {
> +      assert(MD->getNumOperands() == 2 &&
> +             "Unroll hint metadata should have two operands.");
> +      return cast<ConstantInt>(MD->getOperand(1));
> +    }
> +  }
> +  return nullptr;
> +}
> +
> +// Returns true if the loop has an unroll(enable) pragma.
> +static bool HasUnrollEnablePragma(const Loop *L) {
> +  const ConstantInt *EnableValue =
> +      GetUnrollMetadataValue(L, "llvm.loopunroll.enable");
> +  return (EnableValue && EnableValue->getZExtValue());
> +  return false;
> +}
> +
> +// Returns true if the loop has an unroll(disable) pragma.
> +static bool HasUnrollDisablePragma(const Loop *L) {
> +  const ConstantInt *EnableValue =
> +      GetUnrollMetadataValue(L, "llvm.loopunroll.enable");
> +  return (EnableValue && !EnableValue->getZExtValue());
> +  return false;
> +}
> +
> +// Check for unroll_count(N) pragma.  If found, return true and set
> +// Count to the integer parameter of the pragma.
> +static bool HasUnrollCountPragma(const Loop *L, int &Count) {
> +  const ConstantInt *CountValue =
> +      GetUnrollMetadataValue(L, "llvm.loopunroll.count");
> +  if (CountValue) {
> +    Count = CountValue->getZExtValue();
> +    assert(Count >= 1 && "Unroll count must be positive.");
> +    return true;
> +  }
> +  return false;
> +}
> +
>  bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
>    if (skipOptnoneFunction(L))
>      return false;
> @@ -202,12 +268,49 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPa
>      TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
>    }
>  
> -  bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime;
> +  // User-specified count (either as a command-line option or
> +  // constructor parameter) has highest precedence.
> +  unsigned Count = UserCount ? CurrentCount : 0;
> +
> +  // If there is no user-specified count, unroll pragmas have the
> next
> +  // highest precendence.
> +  if (Count == 0) {
> +    if (HasUnrollDisablePragma(L)) {
> +      // Loop has unroll(disable) pragma.
> +      return false;
> +    }
>  
> -  // Use a default unroll-count if the user doesn't specify a value
> -  // and the trip count is a run-time value.  The default is
> different
> -  // for run-time or compile-time trip count loops.
> -  unsigned Count = UserCount ? CurrentCount : UP.Count;
> +    int PragmaCount;
> +    if (HasUnrollCountPragma(L, PragmaCount)) {
> +      if (PragmaCount == 1) {
> +        // Nothing to do.
> +        return false;
> +      }
> +      Count = PragmaCount;
> +      Threshold = NoThreshold;
> +    } else if (HasUnrollEnablePragma(L)) {
> +      // Loop has unroll(enable) pragma without a unroll_count
> pragma,
> +      // so unroll loop fully if possible.
> +      if (TripCount == 0) {
> +        DEBUG(dbgs() << "  Loop has unroll(enable) pragma but loop
> cannot be "
> +                        "fully unrolled because trip count is
> unknown.\n");

This should not say "fully".

> +        // Continue with standard heuristic unrolling.
> +      } else if (TripCount > PragmaFullUnrollCountLimit) {
> +        DEBUG(dbgs() << "  Loop has unroll(enable) pragma but loop
> cannot be "
> +                        "fully unrolled because loop count is
> greater than "
> +                     << PragmaFullUnrollCountLimit);
> +        // Continue with standard heuristic unrolling.


This should also not say "fully". In addition, this is misleading because we might unroll this anyway (just not to the extent requested by the user.

Also, I think that we should be using emitOptimizationRemark here (like LoopVectorize does) instead of debug messages.

Thanks again,
Hal

> +      } else {
> +        Count = TripCount;
> +        Threshold = NoThreshold;
> +      }
> +    }
> +  }
> +
> +  if (Count == 0)
> +    Count = UP.Count;
> +
> +  bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime;
>    if (Runtime && Count == 0 && TripCount == 0)
>      Count = UnrollRuntimeCount;
>  
> 
> Added: llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll?rev=210721&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll (added)
> +++ llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll Wed Jun
> 11 18:15:35 2014
> @@ -0,0 +1,285 @@
> +; RUN: opt < %s -loop-unroll -S | FileCheck %s
> +
> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> +; loop4 contains a small loop which should be completely unrolled by
> +; the default unrolling heuristics.  It serves as a control for the
> +; unroll(disable) pragma test loop4_with_disable.
> +;
> +; CHECK-LABEL: @loop4(
> +; CHECK-NOT: br i1
> +define void @loop4(i32* nocapture %a) {
> +entry:
> +  br label %for.body
> +
> +for.body:                                         ; preds =
> %for.body, %entry
> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body
> ]
> +  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
> +  %0 = load i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %exitcond = icmp eq i64 %indvars.iv.next, 4
> +  br i1 %exitcond, label %for.end, label %for.body
> +
> +for.end:                                          ; preds =
> %for.body
> +  ret void
> +}
> +
> +; #pragma clang loop unroll(disable)
> +;
> +; CHECK-LABEL: @loop4_with_disable(
> +; CHECK: store i32
> +; CHECK-NOT: store i32
> +; CHECK: br i1
> +define void @loop4_with_disable(i32* nocapture %a) {
> +entry:
> +  br label %for.body
> +
> +for.body:                                         ; preds =
> %for.body, %entry
> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body
> ]
> +  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
> +  %0 = load i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %exitcond = icmp eq i64 %indvars.iv.next, 4
> +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
> +
> +for.end:                                          ; preds =
> %for.body
> +  ret void
> +}
> +!1 = metadata !{metadata !1, metadata !2}
> +!2 = metadata !{metadata !"llvm.loopunroll.enable", i1 false}
> +
> +; loop64 has a high enough count that it should *not* be unrolled by
> +; the default unrolling heuristic.  It serves as the control for the
> +; unroll(enable) pragma test loop64_with_.* tests below.
> +;
> +; CHECK-LABEL: @loop64(
> +; CHECK: store i32
> +; CHECK-NOT: store i32
> +; CHECK: br i1
> +define void @loop64(i32* nocapture %a) {
> +entry:
> +  br label %for.body
> +
> +for.body:                                         ; preds =
> %for.body, %entry
> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body
> ]
> +  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
> +  %0 = load i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %exitcond = icmp eq i64 %indvars.iv.next, 64
> +  br i1 %exitcond, label %for.end, label %for.body
> +
> +for.end:                                          ; preds =
> %for.body
> +  ret void
> +}
> +
> +; #pragma clang loop unroll(enable)
> +; Loop should be fully unrolled.
> +;
> +; CHECK-LABEL: @loop64_with_enable(
> +; CHECK-NOT: br i1
> +define void @loop64_with_enable(i32* nocapture %a) {
> +entry:
> +  br label %for.body
> +
> +for.body:                                         ; preds =
> %for.body, %entry
> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body
> ]
> +  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
> +  %0 = load i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %exitcond = icmp eq i64 %indvars.iv.next, 64
> +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
> +
> +for.end:                                          ; preds =
> %for.body
> +  ret void
> +}
> +!3 = metadata !{metadata !3, metadata !4}
> +!4 = metadata !{metadata !"llvm.loopunroll.enable", i1 true}
> +
> +; #pragma clang loop unroll_count(4)
> +; Loop should be unrolled 4 times.
> +;
> +; CHECK-LABEL: @loop64_with_count4(
> +; CHECK: store i32
> +; CHECK: store i32
> +; CHECK: store i32
> +; CHECK: store i32
> +; CHECK-NOT: store i32
> +; CHECK: br i1
> +define void @loop64_with_count4(i32* nocapture %a) {
> +entry:
> +  br label %for.body
> +
> +for.body:                                         ; preds =
> %for.body, %entry
> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body
> ]
> +  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
> +  %0 = load i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %exitcond = icmp eq i64 %indvars.iv.next, 64
> +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
> +
> +for.end:                                          ; preds =
> %for.body
> +  ret void
> +}
> +!5 = metadata !{metadata !5, metadata !6}
> +!6 = metadata !{metadata !"llvm.loopunroll.count", i32 4}
> +
> +
> +; #pragma clang loop unroll_count(enable) unroll_count(4)
> +; Loop should be unrolled 4 times.
> +;
> +; CHECK-LABEL: @loop64_with_enable_and_count4(
> +; CHECK: store i32
> +; CHECK: store i32
> +; CHECK: store i32
> +; CHECK: store i32
> +; CHECK-NOT: store i32
> +; CHECK: br i1
> +define void @loop64_with_enable_and_count4(i32* nocapture %a) {
> +entry:
> +  br label %for.body
> +
> +for.body:                                         ; preds =
> %for.body, %entry
> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body
> ]
> +  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
> +  %0 = load i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %exitcond = icmp eq i64 %indvars.iv.next, 64
> +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
> +
> +for.end:                                          ; preds =
> %for.body
> +  ret void
> +}
> +!7 = metadata !{metadata !7, metadata !6, metadata !4}
> +
> +; #pragma clang loop unroll_count(enable)
> +; Full unrolling is requested, but loop has a dynamic trip count so
> +; no unrolling should occur.
> +;
> +; CHECK-LABEL: @dynamic_loop_with_enable(
> +; CHECK: store i32
> +; CHECK-NOT: store i32
> +; CHECK: br i1
> +define void @dynamic_loop_with_enable(i32* nocapture %a, i32 %b) {
> +entry:
> +  %cmp3 = icmp sgt i32 %b, 0
> +  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
> +
> +for.body:                                         ; preds = %entry,
> %for.body
> +  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry
> ]
> +  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
> +  %0 = load i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
> +  %exitcond = icmp eq i32 %lftr.wideiv, %b
> +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
> +
> +for.end:                                          ; preds =
> %for.body, %entry
> +  ret void
> +}
> +!8 = metadata !{metadata !8, metadata !4}
> +
> +; #pragma clang loop unroll_count(4)
> +; Loop has a dynamic trip count.  Unrolling should occur, but no
> +; conditional branches can be removed.
> +;
> +; CHECK-LABEL: @dynamic_loop_with_count4(
> +; CHECK-NOT: store
> +; CHECK: br i1
> +; CHECK: store
> +; CHECK: br i1
> +; CHECK: store
> +; CHECK: br i1
> +; CHECK: store
> +; CHECK: br i1
> +; CHECK: store
> +; CHECK: br i1
> +; CHECK-NOT: br i1
> +define void @dynamic_loop_with_count4(i32* nocapture %a, i32 %b) {
> +entry:
> +  %cmp3 = icmp sgt i32 %b, 0
> +  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
> +
> +for.body:                                         ; preds = %entry,
> %for.body
> +  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry
> ]
> +  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
> +  %0 = load i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
> +  %exitcond = icmp eq i32 %lftr.wideiv, %b
> +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
> +
> +for.end:                                          ; preds =
> %for.body, %entry
> +  ret void
> +}
> +!9 = metadata !{metadata !9, metadata !6}
> +
> +; #pragma clang loop unroll_count(1)
> +; Loop should not be unrolled
> +;
> +; CHECK-LABEL: @unroll_1(
> +; CHECK: store i32
> +; CHECK-NOT: store i32
> +; CHECK: br i1
> +define void @unroll_1(i32* nocapture %a, i32 %b) {
> +entry:
> +  br label %for.body
> +
> +for.body:                                         ; preds =
> %for.body, %entry
> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body
> ]
> +  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
> +  %0 = load i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %exitcond = icmp eq i64 %indvars.iv.next, 4
> +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
> +
> +for.end:                                          ; preds =
> %for.body
> +  ret void
> +}
> +!10 = metadata !{metadata !10, metadata !11}
> +!11 = metadata !{metadata !"llvm.loopunroll.count", i32 1}
> +
> +; #pragma clang loop unroll(enable)
> +; Loop has very high loop count (1 million) and full unrolling was
> requested.
> +; Loop should not be unrolled.
> +;
> +; CHECK-LABEL: @unroll_1M(
> +; CHECK: store i32
> +; CHECK-NOT: store i32
> +; CHECK: br i1
> +define void @unroll_1M(i32* nocapture %a, i32 %b) {
> +entry:
> +  br label %for.body
> +
> +for.body:                                         ; preds =
> %for.body, %entry
> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body
> ]
> +  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
> +  %0 = load i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %exitcond = icmp eq i64 %indvars.iv.next, 1000000
> +  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
> +
> +for.end:                                          ; preds =
> %for.body
> +  ret void
> +}
> +!12 = metadata !{metadata !12, metadata !4}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 

-- 
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory



More information about the llvm-commits mailing list