[PATCH][polly] Support for generating vectors for loads with -1 stride
Tobias Grosser
tobias at grosser.es
Thu Mar 13 17:07:29 PDT 2014
On 03/10/2014 04:46 PM, chrisj at codeaurora.org wrote:
> Hi all,
>
> This patch enables vectorization of loops containing backward array
> traversal (array stride is -1).
Hi Chris,
this patch looks generally fine. Just some minor nits.
> 0001-Support-for-generating-vectors-for-loads-with-1-stri.patch
>
>
> From 1d8a62768085a53f5a4bff2c80ba9c8b5930460a Mon Sep 17 00:00:00 2001
> From: Chris Jenneisch<chrisj at codeaurora.org>
> Date: Tue, 4 Mar 2014 12:31:57 -0800
> Subject: [PATCH] Support for generating vectors for loads with -1 stride
>
> This patch enables vectorization of loops containing backward array
> traversal (array stride is -1).
> ---
> include/polly/CodeGen/BlockGenerators.h | 9 +++-
> lib/CodeGen/BlockGenerators.cpp | 27 +++++++++--
> test/Isl/CodeGen/simple_vec_stride_negative_one.ll | 49 ++++++++++++++++++++
> 3 files changed, 79 insertions(+), 6 deletions(-)
> create mode 100644 test/Isl/CodeGen/simple_vec_stride_negative_one.ll
>
> diff --git a/include/polly/CodeGen/BlockGenerators.h b/include/polly/CodeGen/BlockGenerators.h
> index 077236d..487521f 100644
> --- a/include/polly/CodeGen/BlockGenerators.h
> +++ b/include/polly/CodeGen/BlockGenerators.h
> @@ -265,7 +265,14 @@ private:
> /// %vector_ptr= bitcast double* %p to <4 x double>*
> /// %vec_full = load <4 x double>* %vector_ptr
> ///
> - Value *generateStrideOneLoad(const LoadInst *Load, ValueMapT &BBMap);
> + /// @param Direction This is used to indicate a -1 stride. In such
> + /// a case we load the end of a base address and
> + /// shuffle the accesses in reverse order into the
> + /// vector. By default we would do only positive
> + /// strides.
Maybe use 'bool NegativeStride' instead of Direction.
> + Value *generateStrideOneLoad(const LoadInst *Load,
> + VectorValueMapT &ScalarMaps, int Direction);
>
> /// @brief Load a vector initialized from a single scalar in memory
> ///
> diff --git a/lib/CodeGen/BlockGenerators.cpp b/lib/CodeGen/BlockGenerators.cpp
> index a5d1f0f..4ce8ca1 100644
> --- a/lib/CodeGen/BlockGenerators.cpp
> +++ b/lib/CodeGen/BlockGenerators.cpp
> @@ -440,11 +440,16 @@ Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
> }
>
> Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
> - ValueMapT &BBMap) {
> + VectorValueMapT &ScalarMaps,
> + int Direction = 0) {
> + unsigned VectorWidth = getVectorWidth();
> const Value *Pointer = Load->getPointerOperand();
> - Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
> - Value *NewPointer =
> - getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load));
> + Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
> + unsigned Offset = Direction ? VectorWidth - 1 : 0;
> +
> + Value *NewPointer = NULL;
> + NewPointer = getNewValue(Pointer, ScalarMaps[Offset], GlobalMaps[Offset],
> + VLTS[Offset], getLoopForInst(Load));
> Value *VectorPtr =
> Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
> LoadInst *VecLoad =
> @@ -452,6 +457,16 @@ Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
> if (!Aligned)
> VecLoad->setAlignment(8);
>
> + if (Direction) {
> + SmallVector<Constant *, 16> Indices;
> + for (int i = VectorWidth - 1; i >= 0; i--)
> + Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
> + Constant *SV = llvm::ConstantVector::get(Indices);
> + Value *RevVecLoad = Builder.CreateShuffleVector(
> + VecLoad, VecLoad, SV, Load->getName() + "reverse");
> + return RevVecLoad;
> + }
> +
> return VecLoad;
> }
>
> @@ -516,7 +531,9 @@ void VectorBlockGenerator::generateLoad(const LoadInst *Load,
> if (Access.isStrideZero(isl_map_copy(Schedule)))
> NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
> else if (Access.isStrideOne(isl_map_copy(Schedule)))
> - NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
> + NewLoad = generateStrideOneLoad(Load, ScalarMaps);
> + else if (Access.isStrideX(isl_map_copy(Schedule), -1))
> + NewLoad = generateStrideOneLoad(Load, ScalarMaps, 1);
> else
> NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
>
> diff --git a/test/Isl/CodeGen/simple_vec_stride_negative_one.ll b/test/Isl/CodeGen/simple_vec_stride_negative_one.ll
> new file mode 100644
> index 0000000..37a160e
> --- /dev/null
> +++ b/test/Isl/CodeGen/simple_vec_stride_negative_one.ll
> @@ -0,0 +1,49 @@
> +; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl -polly-codegen-scev %vector-opt -dce -polly -O3 -S < %s | FileCheck %s
%defaultOps does not seem to be necessary.
Also, why are you running '-dce -polly -O3' at the end?
I would expect something like:
opt %loadPolly -polly-codegen-isl -polly-codegen-scev %vector-opt -S
Also, if the test case is not immediately vectorized, it may be
necessary to reduce the number of loop iterations to a smaller number (4
e.g.).
> +; ModuleID = 'reverse.c'
> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> +;int A[100];
> +;void foo() {
> +; for (int i=999; i >= 0; i--)
> +; A[i]+=1;
> +;}
> +
> +
> + at A = common global [100 x i32] zeroinitializer, align 16
> +
> +; Function Attrs: nounwind uwtable
> +define void @foo() #0 {
> +entry:
> + br label %for.body
> +
> +for.body: ; preds = %entry, %for.body
> + %indvars.iv = phi i64 [ 999, %entry ], [ %indvars.iv.next, %for.body ]
> + %arrayidx = getelementptr inbounds [100 x i32]* @A, i64 0, i64 %indvars.iv
> + %0 = load i32* %arrayidx, align 4, !tbaa !1
> + %add = add nsw i32 %0, 1
> + store i32 %add, i32* %arrayidx, align 4, !tbaa !1
> + %indvars.iv.next = add nsw i64 %indvars.iv, -1
> + %1 = trunc i64 %indvars.iv to i32
> + %cmp = icmp sgt i32 %1, 0
> + br i1 %cmp, label %for.body, label %for.end
> +
> +for.end: ; preds = %for.body
> + ret void
> +}
> +
> +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
Can you remove unnecessary attributes.
> +!llvm.ident = !{!0}
> +
> +!0 = metadata !{metadata !"clang version 3.5.0 (http://llvm.org/git/clang.git b6b24026464c7c5053e1903a27f4dd1f95faf5c8) (http://llvm.org/git/llvm.git 824dfb1c5690ad1494572c95909c0d9dc27338c8)"}
> +!1 = metadata !{metadata !2, metadata !2, i64 0}
> +!2 = metadata !{metadata !"int", metadata !3, i64 0}
> +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
> +!4 = metadata !{metadata !"Simple C/C++ TBAA"}
Can you remove unnecessary metadata.
> +
> +; CHECK: @foo
> +; CHECK: [[LOAD:%[a-zA-Z0-9_]+]] = load <4 x i32>*
> +; CHECK: [[REVERSE_LOAD:%[a-zA-Z0-9_]+reverse]] = shufflevector <4 x i32> [[LOAD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
> -- 1.7.8.3
More information about the llvm-commits
mailing list