Add 'aggressive' keyword for pragma loop hints

Mon Jun 8 11:20:36 PDT 2015

On Fri, Jun 5, 2015 at 7:41 PM, Tyler Nowicki <tnowicki at apple.com> wrote:
> Hi,
>
> This patch (0003) adds support for the keyword aggressive to be used with pragma loop hints. Specifying aggressive for vectorize or interleave will add mem.parallel_loop_access metadata to loads/stores within the loop. This uses the LoopInfo InsertHelper provided for OpenMP.
>
> The new pragma syntax:
>
> #pragma clang loop vectorize(aggressive) interleave(aggressive)
>
> The metadata vectorize.enable will be modified in a follow-up patch to an integer. 0 - disable, 1 - enable, 2 - aggressive. This will be combined with an LLVM patch to identify the aggressive option and override the cost model, and potentially other restrictions on vectorization/interleaving.
>
> Patch 0001 moves some pragma loop hints to the correct folder and 0002 fixes a small diagnostic bug.

Patch 0001 and 0002 LGTM. A few comments on 0003 while you're mulling
over Hal's feedback.

> From c11ab58e4f1937be7d0b261294c0afa2cb3d5b0f Mon Sep 17 00:00:00 2001
> From: Tyler Nowicki <tnowicki at apple.com>
> Date: Fri, 5 Jun 2015 13:23:24 -0700
> Subject: [PATCH 3/3] Add aggressive option for pragma loop vectorize and
>  interleave.
>
> ---
>  include/clang/Basic/Attr.td                 |  6 ++--
>  include/clang/Basic/DiagnosticParseKinds.td |  4 +--
>  lib/CodeGen/CGLoopInfo.cpp                  | 33 +++++++++++++++++--
>  lib/CodeGen/CGLoopInfo.h                    |  6 +++-
>  lib/CodeGen/CGStmt.cpp                      |  8 ++---
>  lib/Parse/ParsePragma.cpp                   |  4 ++-
>  lib/Sema/SemaStmtAttr.cpp                   |  4 ++-
>  test/CodeGenCXX/pragma-loop-aggressive.cpp  | 49 +++++++++++++++++++++++++++++
>  test/Parser/pragma-loop-aggressive.cpp      | 34 ++++++++++++++++++++
>  test/Parser/pragma-loop.cpp                 |  8 ++---
>  10 files changed, 138 insertions(+), 18 deletions(-)
>  create mode 100644 test/CodeGenCXX/pragma-loop-aggressive.cpp
>  create mode 100644 test/Parser/pragma-loop-aggressive.cpp
>
> diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td
> index c310d25..3bd7aff 100644
> --- a/include/clang/Basic/Attr.td
> +++ b/include/clang/Basic/Attr.td
> @@ -1935,8 +1935,8 @@ def LoopHint : Attr {
>                            ["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount",
>                             "Unroll", "UnrollCount"]>,
>                EnumArgument<"State", "LoopHintState",
> -                           ["default", "enable", "disable"],
> -                           ["Default", "Enable", "Disable"]>,
> +                           ["default", "aggressive", "enable", "disable"],
> +                           ["Default", "Aggressive", "Enable", "Disable"]>,

Entirely uncertain whether we care about this or not (we probably
don't), but this will mess up serialized data because it shifts the
option values around.

>                ExprArgument<"Value">];
>
>    let AdditionalMembers = [{
> @@ -1982,6 +1982,8 @@ def LoopHint : Attr {
>        return "";
>      else if (state == Enable)
>        OS << (option == Unroll ? "full" : "enable");
> +    else if (state == Aggressive)
> +      OS << "aggressive";
>      else
>        OS << "disable";
>      OS << ")";
> diff --git a/include/clang/Basic/DiagnosticParseKinds.td b/include/clang/Basic/DiagnosticParseKinds.td
> index f00a3b3..c807592 100644
> --- a/include/clang/Basic/DiagnosticParseKinds.td
> +++ b/include/clang/Basic/DiagnosticParseKinds.td
> @@ -989,12 +989,12 @@ def err_omp_expected_identifier_for_critical : Error<
>  // Pragma loop support.
>  def err_pragma_loop_missing_argument : Error<
>    "missing argument; expected %select{an integer value|"
> -  "'%select{enable|full}1' or 'disable'}0">;
> +  "'%select{enable', 'aggressive|full}1' or 'disable'}0">;

The single quotes look strange here. I think in the case of selecting
enable, you will get two close single quotes, and in the case of
aggressive, you will get two open single quotes. I'm surprised a test
doesn't catch this, which suggests I may simply be wrong. ;-)

>  def err_pragma_loop_invalid_option : Error<
>    "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, "
>    "vectorize_width, interleave, interleave_count, unroll, or unroll_count">;
>  def err_pragma_invalid_keyword : Error<
> -  "invalid argument; expected '%select{enable|full}0' or 'disable'">;
> +  "invalid argument; expected '%select{enable', 'aggressive|full}0' or 'disable'">;

Same problem here as above.

>
>  // Pragma unroll support.
>  def warn_pragma_unroll_cuda_value_in_parens : Warning<
> diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp
> index 011ae7e..c285092 100644
> --- a/lib/CodeGen/CGLoopInfo.cpp
> +++ b/lib/CodeGen/CGLoopInfo.cpp
> @@ -8,13 +8,14 @@
>  //===----------------------------------------------------------------------===//
>
>  #include "CGLoopInfo.h"
> +#include "clang/AST/Attr.h"
> +#include "clang/Sema/LoopHint.h"
>  #include "llvm/IR/BasicBlock.h"
>  #include "llvm/IR/Constants.h"
>  #include "llvm/IR/InstrTypes.h"
>  #include "llvm/IR/Instructions.h"
>  #include "llvm/IR/Metadata.h"
> -using namespace clang;
> -using namespace CodeGen;
> +using namespace clang::CodeGen;

While I prefer this change, it does seem like a drive by that should
be in a separate commit.

>  using namespace llvm;
>
>  static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
> @@ -77,7 +78,33 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs)
>    LoopID = createMetadata(Header->getContext(), Attrs);
>  }
>
> -void LoopInfoStack::push(BasicBlock *Header) {
> +void LoopInfoStack::push(BasicBlock *Header, ArrayRef<const Attr *> Attrs) {
> +  for (const auto *Attr : Attrs) {
> +    const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr);
> +
> +    // Skip non loop hint attributes
> +    if (!LH)
> +      continue;
> +
> +    LoopHintAttr::OptionType Option = LH->getOption();
> +    LoopHintAttr::LoopHintState State = LH->getState();
> +    switch (Option) {
> +    case LoopHintAttr::Vectorize:
> +    case LoopHintAttr::Interleave:
> +      if (State == LoopHintAttr::Aggressive) {
> +        // Apply "llvm.mem.parallel_loop_access" metadata to load/stores.
> +        setParallel(true);
> +      }
> +      break;
> +    case LoopHintAttr::VectorizeWidth:
> +    case LoopHintAttr::InterleaveCount:
> +    case LoopHintAttr::Unroll:
> +    case LoopHintAttr::UnrollCount:
> +      // Nothing to do here for these loop hints.
> +      break;
> +    }
> +  }
> +
>    Active.push_back(LoopInfo(Header, StagedAttrs));
>    // Clear the attributes so nested loops do not inherit them.
>    StagedAttrs.clear();
> diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h
> index aee1621..ab61cd1 100644
> --- a/lib/CodeGen/CGLoopInfo.h
> +++ b/lib/CodeGen/CGLoopInfo.h
> @@ -15,6 +15,7 @@
>  #ifndef LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H
>  #define LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H
>
> +#include "llvm/ADT/ArrayRef.h"
>  #include "llvm/ADT/DenseMap.h"
>  #include "llvm/ADT/SmallVector.h"
>  #include "llvm/IR/Value.h"
> @@ -27,6 +28,7 @@ class MDNode;
>  } // end namespace llvm
>
>  namespace clang {
> +class Attr;
>  namespace CodeGen {
>
>  /// \brief Attributes that may be specified on loops.
> @@ -86,7 +88,9 @@ public:
>
>    /// \brief Begin a new structured loop. The set of staged attributes will be
>    /// applied to the loop and then cleared.
> -  void push(llvm::BasicBlock *Header);
> +  void
> +  push(llvm::BasicBlock *Header,
> +       llvm::ArrayRef<const Attr *> Attrs = llvm::ArrayRef<const Attr *>());

Should use None here.

>
>    /// \brief End the current loop.
>    void pop();
> diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
> index c879750..8cfabb6 100644
> --- a/lib/CodeGen/CGStmt.cpp
> +++ b/lib/CodeGen/CGStmt.cpp
> @@ -682,7 +682,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
>    JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
>    EmitBlock(LoopHeader.getBlock());
>
> -  LoopStack.push(LoopHeader.getBlock());
> +  LoopStack.push(LoopHeader.getBlock(), WhileAttrs);
>
>    // Create an exit block for when the condition fails, which will
>    // also become the break target.
> @@ -776,7 +776,7 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S,
>    // Emit the body of the loop.
>    llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
>
> -  LoopStack.push(LoopBody);
> +  LoopStack.push(LoopBody, DoAttrs);
>
>    EmitBlockWithFallThrough(LoopBody, &S);
>    {
> @@ -842,7 +842,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
>    llvm::BasicBlock *CondBlock = Continue.getBlock();
>    EmitBlock(CondBlock);
>
> -  LoopStack.push(CondBlock);
> +  LoopStack.push(CondBlock, ForAttrs);
>
>    // If the for loop doesn't have an increment we can just use the
>    // condition as the continue block.  Otherwise we'll need to create
> @@ -940,7 +940,7 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
>    llvm::BasicBlock *CondBlock = createBasicBlock("for.cond");
>    EmitBlock(CondBlock);
>
> -  LoopStack.push(CondBlock);
> +  LoopStack.push(CondBlock, ForAttrs);
>
>    // If there are any cleanups between here and the loop-exit scope,
>    // create a block to stage a loop exit along.
> diff --git a/lib/Parse/ParsePragma.cpp b/lib/Parse/ParsePragma.cpp
> index 84256df..7acf4a2 100644
> --- a/lib/Parse/ParsePragma.cpp
> +++ b/lib/Parse/ParsePragma.cpp
> @@ -824,7 +824,8 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
>      SourceLocation StateLoc = Toks[0].getLocation();
>      IdentifierInfo *StateInfo = Toks[0].getIdentifierInfo();
>      if (!StateInfo || ((OptionUnroll ? !StateInfo->isStr("full")
> -                                     : !StateInfo->isStr("enable")) &&
> +                                     : !StateInfo->isStr("enable") &&
> +                                           !StateInfo->isStr("aggressive")) &&

Indentation is a bit strange here, did clang-format do this?

>                         !StateInfo->isStr("disable"))) {
>        Diag(Toks[0].getLocation(), diag::err_pragma_invalid_keyword)
>            << /*FullKeyword=*/OptionUnroll;
> @@ -1954,6 +1955,7 @@ static bool ParseLoopHintValue(Preprocessor &PP, Token &Tok, Token PragmaName,
>  ///  loop-hint-keyword:
>  ///    'enable'
>  ///    'disable'
> +///    'aggressive'
>  ///
>  ///  unroll-hint-keyword:
>  ///    'full'
> diff --git a/lib/Sema/SemaStmtAttr.cpp b/lib/Sema/SemaStmtAttr.cpp
> index 19e2c8e..4fe4796 100644
> --- a/lib/Sema/SemaStmtAttr.cpp
> +++ b/lib/Sema/SemaStmtAttr.cpp
> @@ -105,6 +105,8 @@ static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const AttributeList &A,
>      if (StateLoc && StateLoc->Ident) {
>        if (StateLoc->Ident->isStr("disable"))
>          State = LoopHintAttr::Disable;
> +      else if (StateLoc->Ident->isStr("aggressive"))
> +        State = LoopHintAttr::Aggressive;
>        else
>          State = LoopHintAttr::Enable;
>      }
> @@ -159,7 +161,7 @@ CheckForIncompatibleAttributes(Sema &S,
>      const LoopHintAttr *PrevAttr;
>      if (Option == LoopHintAttr::Vectorize ||
>          Option == LoopHintAttr::Interleave || Option == LoopHintAttr::Unroll) {
> -      // Enable|disable hint.  For example, vectorize(enable).
> +      // Enable|Aggressive|disable hint.  For example, vectorize(enable).
>        PrevAttr = CategoryState.StateAttr;
>        CategoryState.StateAttr = LH;
>      } else {
> diff --git a/test/CodeGenCXX/pragma-loop-aggressive.cpp b/test/CodeGenCXX/pragma-loop-aggressive.cpp
> new file mode 100644
> index 0000000..32576b2
> --- /dev/null
> +++ b/test/CodeGenCXX/pragma-loop-aggressive.cpp
> @@ -0,0 +1,49 @@
> +// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s
> +
> +// Verify aggressive vectorization is recognized.
> +void vectorize_test(int *List, int Length) {
> +// CHECK: define {{.*}} @_Z14vectorize_testPii
> +// CHECK: [[LOAD1_IV:.+]] = load i32, i32* [[IV1:[^,]+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID:[0-9]+]]
> +// CHECK-NEXT: [[LOAD1_LEN:.+]] = load i32, i32* [[LEN1:.+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
> +// CHECK-NEXT: [[CMP1:.+]] = icmp slt i32[[LOAD1_IV]],[[LOAD1_LEN]]
> +// CHECK-NEXT: br i1[[CMP1]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]], !llvm.loop ![[LOOP1_HINTS:[0-9]+]]
> +#pragma clang loop vectorize(aggressive)
> +  for (int i = 0; i < Length; i++) {
> +    // CHECK: [[LOOP1_BODY]]
> +    // CHECK-NEXT: [[RHIV1:.+]] = load i32, i32* [[IV1]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
> +    // CHECK-NEXT: [[CALC1:.+]] = mul nsw i32[[RHIV1]], 2
> +    // CHECK-NEXT: [[SIV1:.+]] = load i32, i32* [[IV1]]{{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
> +    // CHECK-NEXT: [[INDEX1:.+]] = sext i32[[SIV1]] to i64
> +    // CHECK-NEXT: [[ARRAY1:.+]] = load i32*, i32** [[LIST1:.*]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
> +    // CHECK-NEXT: [[PTR1:.+]] = getelementptr inbounds i32, i32*[[ARRAY1]], i64[[INDEX1]]
> +    // CHECK-NEXT: store i32[[CALC1]], i32*[[PTR1]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
> +    List[i] = i * 2;
> +  }
> +  // CHECK: [[LOOP1_END]]
> +}
> +
> +// Verify aggressive interleaving is recognized.
> +void interleave_test(int *List, int Length) {
> +// CHECK: define {{.*}} @_Z15interleave_testPii
> +// CHECK: [[LOAD2_IV:.+]] = load i32, i32* [[IV2:[^,]+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID:[0-9]+]]
> +// CHECK-NEXT: [[LOAD2_LEN:.+]] = load i32, i32* [[LEN2:.+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
> +// CHECK-NEXT: [[CMP2:.+]] = icmp slt i32[[LOAD2_IV]],[[LOAD2_LEN]]
> +// CHECK-NEXT: br i1[[CMP2]], label %[[LOOP2_BODY:[^,]+]], label %[[LOOP2_END:[^,]+]], !llvm.loop ![[LOOP2_HINTS:[0-9]+]]
> +#pragma clang loop interleave(aggressive)
> +  for (int i = 0; i < Length; i++) {
> +    // CHECK: [[LOOP2_BODY]]
> +    // CHECK-NEXT: [[RHIV2:.+]] = load i32, i32* [[IV2]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
> +    // CHECK-NEXT: [[CALC2:.+]] = mul nsw i32[[RHIV2]], 2
> +    // CHECK-NEXT: [[SIV2:.+]] = load i32, i32* [[IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
> +    // CHECK-NEXT: [[INDEX2:.+]] = sext i32[[SIV2]] to i64
> +    // CHECK-NEXT: [[ARRAY2:.+]] = load i32*, i32** [[LIST2:.*]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
> +    // CHECK-NEXT: [[PTR2:.+]] = getelementptr inbounds i32, i32*[[ARRAY2]], i64[[INDEX2]]
> +    // CHECK-NEXT: store i32[[CALC2]], i32*[[PTR2]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
> +    List[i] = i * 2;
> +  }
> +  // CHECK: [[LOOP2_END]]
> +}
> +
> +// CHECK: ![[LOOP1_HINTS]] = distinct !{![[LOOP1_HINTS]], ![[INTENABLE_1:.*]]}
> +// CHCCK: ![[INTENABLE_1]] = !{!"llvm.loop.vectorize.enable", i1 true}
> +// CHECK: ![[LOOP2_HINTS]] = distinct !{![[LOOP2_HINTS]], ![[INTENABLE_1:.*]]}
> diff --git a/test/Parser/pragma-loop-aggressive.cpp b/test/Parser/pragma-loop-aggressive.cpp
> new file mode 100644
> index 0000000..da7b4c8
> --- /dev/null
> +++ b/test/Parser/pragma-loop-aggressive.cpp
> @@ -0,0 +1,34 @@
> +// RUN: %clang_cc1 -std=c++11 -verify %s
> +
> +// Note that this puts the expected lines before the directives to work around
> +// limitations in the -verify mode.
> +
> +void test(int *List, int Length) {
> +  int i = 0;
> +
> +#pragma clang loop vectorize(aggressive)
> +#pragma clang loop interleave(aggressive)
> +  while (i + 1 < Length) {
> +    List[i] = i;
> +  }
> +
> +/* expected-error {{expected ')'}} */ #pragma clang loop vectorize(aggressive
> +/* expected-error {{expected ')'}} */ #pragma clang loop interleave(aggressive
> +
> +/* expected-error {{invalid argument; expected 'full' or 'disable'}} */ #pragma clang loop unroll(aggressive)
> +
> +/* expected-error {{invalid argument; expected 'enable', 'aggressive' or 'disable'}} */ #pragma clang loop vectorize(badidentifier)
> +/* expected-error {{invalid argument; expected 'enable', 'aggressive' or 'disable'}} */ #pragma clang loop interleave(badidentifier)
> +/* expected-error {{invalid argument; expected 'full' or 'disable'}} */ #pragma clang loop unroll(badidentifier)
> +  while (i-7 < Length) {
> +    List[i] = i;
> +  }
> +
> +/* expected-error {{duplicate directives 'vectorize(aggressive)' and 'vectorize(enable)'}} */ #pragma clang loop vectorize(enable)
> +#pragma clang loop vectorize(aggressive)
> +/* expected-error {{duplicate directives 'interleave(aggressive)' and 'interleave(enable)'}} */ #pragma clang loop interleave(enable)
> +#pragma clang loop interleave(aggressive)
> +  while (i-9 < Length) {
> +    List[i] = i;
> +  }
> +}
> diff --git a/test/Parser/pragma-loop.cpp b/test/Parser/pragma-loop.cpp
> index a0213ac..3057b69 100644
> --- a/test/Parser/pragma-loop.cpp
> +++ b/test/Parser/pragma-loop.cpp
> @@ -130,7 +130,7 @@ void test(int *List, int Length) {
>  /* expected-error {{expected ')'}} */ #pragma clang loop interleave_count(4
>  /* expected-error {{expected ')'}} */ #pragma clang loop unroll_count(4
>
> -/* expected-error {{missing argument; expected 'enable' or 'disable'}} */ #pragma clang loop vectorize()
> +/* expected-error {{missing argument; expected 'enable', 'aggressive' or 'disable'}} */ #pragma clang loop vectorize()
>  /* expected-error {{missing argument; expected an integer value}} */ #pragma clang loop interleave_count()
>  /* expected-error {{missing argument; expected 'full' or 'disable'}} */ #pragma clang loop unroll()
>
> @@ -184,8 +184,8 @@ const int VV = 4;
>      List[i] = i;
>    }
>
> -/* expected-error {{invalid argument; expected 'enable' or 'disable'}} */ #pragma clang loop vectorize(badidentifier)
> -/* expected-error {{invalid argument; expected 'enable' or 'disable'}} */ #pragma clang loop interleave(badidentifier)
> +/* expected-error {{invalid argument; expected 'enable', 'aggressive' or 'disable'}} */ #pragma clang loop vectorize(badidentifier)
> +/* expected-error {{invalid argument; expected 'enable', 'aggressive' or 'disable'}} */ #pragma clang loop interleave(badidentifier)
>  /* expected-error {{invalid argument; expected 'full' or 'disable'}} */ #pragma clang loop unroll(badidentifier)
>    while (i-7 < Length) {
>      List[i] = i;
> @@ -194,7 +194,7 @@ const int VV = 4;
>  // PR20069 - Loop pragma arguments that are not identifiers or numeric
>  // constants crash FE.
>  /* expected-error {{expected ')'}} */ #pragma clang loop vectorize(()
> -/* expected-error {{invalid argument; expected 'enable' or 'disable'}} */ #pragma clang loop interleave(*)
> +/* expected-error {{invalid argument; expected 'enable', 'aggressive' or 'disable'}} */ #pragma clang loop interleave(*)
>  /* expected-error {{invalid argument; expected 'full' or 'disable'}} */ #pragma clang loop unroll(=)
>  /* expected-error {{type name requires a specifier or qualifier}} expected-error {{expected expression}} */ #pragma clang loop vectorize_width(^)
>  /* expected-error {{expected expression}} expected-error {{expected expression}} */ #pragma clang loop interleave_count(/)
> --
> 2.3.2 (Apple Git-55)
>
>

~Aaron