[polly] Reduction detection [V4]

Tobias Grosser tobias at grosser.es
Mon Jun 16 12:55:38 PDT 2014


On 16/06/2014 21:10, Johannes Doerfert wrote:
> Hey Tobias,
>
>
>
> Attached the next version of the detection patch, hopefully to your liking.
>
> I think I changed it the way we discussed, if not, let me know


Hi Johannes,

thanks for the update. The patch looks good. Two minimal things are left:

1) A typo (see inline comments)

2) An inconsistent comment.

Otherwise, this one LGTM. Please commit after addressing my last remarks.

> +  /// @brief Flag to indicate reduction like statements
> +  ///
> +  /// A statement is reduction like if it contains exactly one load and one
> +  /// store with exactly one binary operator in between. The binary operator
> +  /// needs to be associative and commutative. Compared to binary reductions "in
> +  /// the common sense" which load from and store to the same data location,
> +  /// reduction like statements do not impose any constraints on the location
> +  /// data is loaded from / stored to.
> +  /// like:
> +  ///
> +  ///    for i
> +  ///      for j
> +  ///        sum[i+j] = sum[i] + 3;
> +  ///
> +  /// Here not all iterations access the same memory location, but iterations
> +  /// for which j = 0 holds do. To take advantage of the reduction property,
> +  /// transformations do not only need check if a statement is reduction like,
> +  /// but they also need to verify that that the reduction property is only
> +  /// exploited for statement instances that load from and store to the same
> +  /// data location.
> +  bool IsReductionLike = false;

You added a check that the input and output pointer locations are 
identical (possibly due to me preferring this), but the comment here 
explains something different. This is inconsistent and needs updating.

I know that I proposed the comment, but just to show what kind of 
information may be useful to explain 'reduction-like' and specifically 
the property that load/store locations can be different. I proposed this 
comment as you seemed to strongly prefer to keep the two independent.

As you now decided to follow my advice to ensure that the two are 
identical we need to update the comment to be consistent.

>     //@}
>
>     /// The BasicBlock represented by this statement.
> @@ -292,6 +313,7 @@ class ScopStmt {
>     __isl_give isl_set *buildDomain(TempScop &tempScop, const Region &CurRegion);
>     void buildScattering(SmallVectorImpl<unsigned> &Scatter);
>     void buildAccesses(TempScop &tempScop, const Region &CurRegion);
> +  void checkForReduction();
>     //@}
>
>     /// Create the ScopStmt from a BasicBlock.
> @@ -303,6 +325,10 @@ class ScopStmt {
>
>   public:
>     ~ScopStmt();
> +
> +  /// @brief Return true iff this statement is a reduction like statement
> +  bool isReductionLike() const { return IsReductionLike; }
> +
>     /// @brief Get an isl_ctx pointer.
>     isl_ctx *getIslCtx() const;
>
> diff --git a/lib/Analysis/ScopInfo.cpp b/lib/Analysis/ScopInfo.cpp
> index c8d3e1d..766543d 100644
> --- a/lib/Analysis/ScopInfo.cpp
> +++ b/lib/Analysis/ScopInfo.cpp
> @@ -20,6 +20,7 @@
>   #include "polly/CodeGen/BlockGenerators.h"
>   #include "polly/LinkAllPasses.h"
>   #include "polly/ScopInfo.h"
> +#include "polly/Options.h"
>   #include "polly/Support/GICHelper.h"
>   #include "polly/Support/SCEVValidator.h"
>   #include "polly/Support/ScopHelper.h"
> @@ -30,7 +31,6 @@
>   #include "llvm/Analysis/LoopInfo.h"
>   #include "llvm/Analysis/RegionIterator.h"
>   #include "llvm/Analysis/ScalarEvolutionExpressions.h"
> -#include "llvm/Support/CommandLine.h"
>   #include "llvm/Support/Debug.h"
>
>   #include "isl/constraint.h"
> @@ -55,6 +55,15 @@ using namespace polly;
>   STATISTIC(ScopFound, "Number of valid Scops");
>   STATISTIC(RichScopFound, "Number of Scops containing a loop");
>
> +// Multiplicative reductions can be disabled seperately as these kind of

Type: separately

> +void ScopStmt::checkForReduction() {
> +  // Skip statements with more than one binary reduction
> +  if (MemAccs.size() != 2)
> +    return;
> +
> +  // Skip if there is not exactly one load and one store
> +  unsigned LoadIdx = MemAccs[0]->isRead() ? 0 : 1;
> +  auto *Load = dyn_cast<LoadInst>(MemAccs[LoadIdx]->getAccessInstruction());
> +  auto *Store =
> +      dyn_cast<StoreInst>(MemAccs[1 - LoadIdx]->getAccessInstruction());
> +  if (!Load || !Store ||
> +      Load->getPointerOperand() != Store->getPointerOperand())
> +    return;
> +
> +  // Skip if there is not one binary operator between the load and the store
> +  auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
> +  if (!BinOp || (BinOp->getOperand(0) != Load && BinOp->getOperand(1) != Load))
> +    return;
> +
> +  // Skip if the opcode of the binary operator is not commutative/associative
> +  if (!BinOp->isCommutative() || !BinOp->isAssociative())
> +    return;
> +
> +  // Skip if the load has multiple uses
> +  if (Load->getNumUses() != 1)
> +    return;
> +
> +  // Skip if it is a multiplicative reduction and we disabled them
> +  if (DisableMultiplicativeReductions &&
> +      (BinOp->getOpcode() == Instruction::Mul ||
> +       BinOp->getOpcode() == Instruction::FMul))
> +    return;
> +
> +  // Valid reduction like statement
> +  IsReductionLike = true;
>   }
>
>   std::string ScopStmt::getDomainStr() const { return stringFromIslObj(Domain); }
> @@ -751,6 +798,7 @@ ScopStmt::~ScopStmt() {
>
>   void ScopStmt::print(raw_ostream &OS) const {
>     OS << "\t" << getBaseName() << "\n";
> +  OS.indent(8) << "Reduction like: " << isReductionLike() << "\n";
>
>     OS.indent(12) << "Domain :=\n";
>
> diff --git a/test/ScopInfo/reduction_only_reduction_like_access.ll b/test/ScopInfo/reduction_only_reduction_like_access.ll
> new file mode 100644
> index 0000000..c84d9c7
> --- /dev/null
> +++ b/test/ScopInfo/reduction_only_reduction_like_access.ll
> @@ -0,0 +1,36 @@
> +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=DETECTION
> +;
> +; DETECTION: Reduction like: 1
> +;
> +; void f(int *sum) {
> +;   int i;
> +;   for (i = 0; i < 100; i++)
> +;     sum[i] = sum[99-i] + i;
> +; }
> +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
> +
> +define void @f(i32* %sum) {
> +entry:
> +  br label %for.cond
> +
> +for.cond:                                         ; preds = %for.inc, %entry
> +  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
> +  %exitcond = icmp ne i32 %i.0, 100
> +  br i1 %exitcond, label %for.body, label %for.end
> +
> +for.body:                                         ; preds = %for.cond
> +  %sub = sub nsw i32 99, %i.0
> +  %arrayidx = getelementptr inbounds i32* %sum, i32 %sub
> +  %tmp = load i32* %arrayidx, align 4
> +  %add = add nsw i32 %tmp, %i.0
> +  %arrayidx1 = getelementptr inbounds i32* %sum, i32 %i.0
> +  store i32 %add, i32* %arrayidx1, align 4
> +  br label %for.inc
> +
> +for.inc:                                          ; preds = %for.body
> +  %inc = add nsw i32 %i.0, 1
> +  br label %for.cond
> +
> +for.end:                                          ; preds = %for.cond
> +  ret void
> +}
> diff --git a/test/ScopInfo/reduction_simple_fp.ll b/test/ScopInfo/reduction_simple_fp.ll
> new file mode 100644
> index 0000000..0763a98
> --- /dev/null
> +++ b/test/ScopInfo/reduction_simple_fp.ll
> @@ -0,0 +1,65 @@
> +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=DETECTION
> +;
> +; DETECTION: Function: f_no_fast_math
> +; DETECTION: Reduction like: 0
> +; DETECTION: Function: f_fast_math
> +; DETECTION: Reduction like: 1
> +;
> +; void f(float *sum) {
> +;   int i;
> +;   for (i = 0; i < 100; i++)
> +;     *sum += 3.41 * i;
> +; }
> +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
> +
> +define void @f_no_fast_math(float* %sum) {
> +entry:
> +  br label %for.cond
> +
> +for.cond:                                         ; preds = %for.inc, %entry
> +  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
> +  %exitcond = icmp ne i32 %i.0, 100
> +  br i1 %exitcond, label %for.body, label %for.end
> +
> +for.body:                                         ; preds = %for.cond
> +  %conv = sitofp i32 %i.0 to float
> +  %pi = fptrunc double 3.41 to float
> +  %mul = fmul float %conv, %pi
> +  %tmp = load float* %sum, align 4
> +  %add = fadd float %tmp, %mul
> +  store float %add, float* %sum, align 4
> +  br label %for.inc
> +
> +for.inc:                                          ; preds = %for.body
> +  %inc = add nsw i32 %i.0, 1
> +  br label %for.cond
> +
> +for.end:                                          ; preds = %for.cond
> +  ret void
> +}
> +
> +define void @f_fast_math(float* %sum) {
> +entry:
> +  br label %for.cond
> +
> +for.cond:                                         ; preds = %for.inc, %entry
> +  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
> +  %exitcond = icmp ne i32 %i.0, 100
> +  br i1 %exitcond, label %for.body, label %for.end
> +
> +for.body:                                         ; preds = %for.cond
> +  %conv = sitofp i32 %i.0 to float
> +  %pi = fptrunc double 3.41 to float
> +  %mul = fmul fast float %conv, %pi
> +  %tmp = load float* %sum, align 4
> +  %add = fadd fast float %tmp, %mul
> +  store float %add, float* %sum, align 4
> +  br label %for.inc
> +
> +for.inc:                                          ; preds = %for.body
> +  %inc = add nsw i32 %i.0, 1
> +  br label %for.cond
> +
> +for.end:                                          ; preds = %for.cond
> +  ret void
> +}
> diff --git a/test/ScopInfo/reduction_simple_w_constant.ll b/test/ScopInfo/reduction_simple_w_constant.ll
> new file mode 100644
> index 0000000..df93940
> --- /dev/null
> +++ b/test/ScopInfo/reduction_simple_w_constant.ll
> @@ -0,0 +1,33 @@
> +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=DETECTION
> +;
> +; DETECTION: Reduction like: 1
> +;
> +; void f(int *sum) {
> +;   int i;

Remove this line.

> +;   for (int i = 0; i <= 100; i++)
> +;     sum += 3;
> +; }
> +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
> +
> +define void @f(i32* %sum) {
> +entry:
> +  br label %entry.split1
> +
> +entry.split1:                                     ; preds = %entry
> +  br label %entry.split
> +
> +entry.split:                                      ; preds = %entry.split1
> +  br label %for.cond
> +
> +for.cond:                                         ; preds = %for.cond, %entry.split
> +  %i1.0 = phi i32 [ 0, %entry.split ], [ %inc, %for.cond ]
> +  %sum.reload = load i32* %sum
> +  %add = add nsw i32 %sum.reload, 3
> +  %inc = add nsw i32 %i1.0, 1
> +  store i32 %add, i32* %sum
> +  %cmp = icmp slt i32 %i1.0, 100
> +  br i1 %cmp, label %for.cond, label %for.end
> +
> +for.end:                                          ; preds = %for.cond
> +  ret void
> +}
> diff --git a/test/ScopInfo/reduction_simple_w_iv.ll b/test/ScopInfo/reduction_simple_w_iv.ll
> new file mode 100644
> index 0000000..532f647
> --- /dev/null
> +++ b/test/ScopInfo/reduction_simple_w_iv.ll
> @@ -0,0 +1,33 @@
> +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=DETECTION
> +;
> +; DETECTION: Reduction like: 1
> +;
> +; void f(int* sum) {
> +;   for (int i = 0; i <= 100; i++)
> +;     sum += i * 3;
> +; }
> +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
> +
> +define void @f(i32* %sum) {
> +entry:
> +  br label %entry.split1
> +
> +entry.split1:                                     ; preds = %entry
> +  br label %entry.split
> +
> +entry.split:                                      ; preds = %entry.split1
> +  br label %for.cond
> +
> +for.cond:                                         ; preds = %for.cond, %entry.split
> +  %i1.0 = phi i32 [ 0, %entry.split ], [ %inc, %for.cond ]
> +  %sum.reload = load i32* %sum
> +  %mul = mul nsw i32 %i1.0, 3
> +  %add = add nsw i32 %sum.reload, %mul
> +  %inc = add nsw i32 %i1.0, 1
> +  store i32 %add, i32* %sum
> +  %cmp = icmp slt i32 %i1.0, 100
> +  br i1 %cmp, label %for.cond, label %for.end
> +
> +for.end:                                          ; preds = %for.cond
> +  ret void
> +}




More information about the llvm-commits mailing list