[PATCH][polly] Support for generating vectors for loads with -1 stride

Chris Jenneisch chrisj at codeaurora.org
Tue Mar 18 17:45:18 PDT 2014


Hi Tobi,

Thanks for the comments, I have attached the updated patch.

Chris

> On 03/10/2014 04:46 PM, chrisj at codeaurora.org wrote:
>> Hi all,
>>
>> This patch enables vectorization of loops containing backward array
>> traversal (array stride is -1).
>
> Hi Chris,
>
> this patch looks generally fine. Just some minor nits.
>
>
>> 0001-Support-for-generating-vectors-for-loads-with-1-stri.patch
>>
>>
>>  From 1d8a62768085a53f5a4bff2c80ba9c8b5930460a Mon Sep 17 00:00:00 2001
>> From: Chris Jenneisch<chrisj at codeaurora.org>
>> Date: Tue, 4 Mar 2014 12:31:57 -0800
>> Subject: [PATCH] Support for generating vectors for loads with -1 stride
>>
>> This patch enables vectorization of loops containing backward array
>> traversal (array stride is -1).
>> ---
>>   include/polly/CodeGen/BlockGenerators.h            |    9 +++-
>>   lib/CodeGen/BlockGenerators.cpp                    |   27 +++++++++--
>>   test/Isl/CodeGen/simple_vec_stride_negative_one.ll |   49
>> ++++++++++++++++++++
>>   3 files changed, 79 insertions(+), 6 deletions(-)
>>   create mode 100644 test/Isl/CodeGen/simple_vec_stride_negative_one.ll
>>
>> diff --git a/include/polly/CodeGen/BlockGenerators.h
>> b/include/polly/CodeGen/BlockGenerators.h
>> index 077236d..487521f 100644
>> --- a/include/polly/CodeGen/BlockGenerators.h
>> +++ b/include/polly/CodeGen/BlockGenerators.h
>> @@ -265,7 +265,14 @@ private:
>>     /// %vector_ptr= bitcast double* %p to <4 x double>*
>>     /// %vec_full = load <4 x double>* %vector_ptr
>>     ///
>> -  Value *generateStrideOneLoad(const LoadInst *Load, ValueMapT &BBMap);
>> +  /// @param Direction This is used to indicate a -1 stride. In such
>> +  ///                  a case we load the end of a base address and
>> +  ///                  shuffle the accesses in reverse order into the
>> +  ///                  vector. By default we would do only positive
>> +  ///                  strides.
>
> Maybe use 'bool NegativeStride' instead of Direction.
>
>> +  Value *generateStrideOneLoad(const LoadInst *Load,
>> +                               VectorValueMapT &ScalarMaps, int
>> Direction);
>>
>>     /// @brief Load a vector initialized from a single scalar in memory
>>     ///
>> diff --git a/lib/CodeGen/BlockGenerators.cpp
>> b/lib/CodeGen/BlockGenerators.cpp
>> index a5d1f0f..4ce8ca1 100644
>> --- a/lib/CodeGen/BlockGenerators.cpp
>> +++ b/lib/CodeGen/BlockGenerators.cpp
>> @@ -440,11 +440,16 @@ Type *VectorBlockGenerator::getVectorPtrTy(const
>> Value *Val, int Width) {
>>   }
>>
>>   Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst
>> *Load,
>> -                                                   ValueMapT &BBMap) {
>> +                                                   VectorValueMapT
>> &ScalarMaps,
>> +                                                   int Direction = 0) {
>> +  unsigned VectorWidth = getVectorWidth();
>>     const Value *Pointer = Load->getPointerOperand();
>> -  Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
>> -  Value *NewPointer =
>> -      getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0],
>> getLoopForInst(Load));
>> +  Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
>> +  unsigned Offset = Direction ? VectorWidth - 1 : 0;
>> +
>> +  Value *NewPointer = NULL;
>> +  NewPointer = getNewValue(Pointer, ScalarMaps[Offset],
>> GlobalMaps[Offset],
>> +                           VLTS[Offset], getLoopForInst(Load));
>>     Value *VectorPtr =
>>         Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
>>     LoadInst *VecLoad =
>> @@ -452,6 +457,16 @@ Value
>> *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
>>     if (!Aligned)
>>       VecLoad->setAlignment(8);
>>
>> +  if (Direction) {
>> +    SmallVector<Constant *, 16> Indices;
>> +    for (int i = VectorWidth - 1; i >= 0; i--)
>> +      Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
>> +    Constant *SV = llvm::ConstantVector::get(Indices);
>> +    Value *RevVecLoad = Builder.CreateShuffleVector(
>> +        VecLoad, VecLoad, SV, Load->getName() + "reverse");
>> +    return RevVecLoad;
>> +  }
>> +
>>     return VecLoad;
>>   }
>>
>> @@ -516,7 +531,9 @@ void VectorBlockGenerator::generateLoad(const
>> LoadInst *Load,
>>     if (Access.isStrideZero(isl_map_copy(Schedule)))
>>       NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
>>     else if (Access.isStrideOne(isl_map_copy(Schedule)))
>> -    NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
>> +    NewLoad = generateStrideOneLoad(Load, ScalarMaps);
>> +  else if (Access.isStrideX(isl_map_copy(Schedule), -1))
>> +    NewLoad = generateStrideOneLoad(Load, ScalarMaps, 1);
>>     else
>>       NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
>>
>> diff --git a/test/Isl/CodeGen/simple_vec_stride_negative_one.ll
>> b/test/Isl/CodeGen/simple_vec_stride_negative_one.ll
>> new file mode 100644
>> index 0000000..37a160e
>> --- /dev/null
>> +++ b/test/Isl/CodeGen/simple_vec_stride_negative_one.ll
>> @@ -0,0 +1,49 @@
>> +; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl
>> -polly-codegen-scev %vector-opt -dce -polly -O3 -S < %s | FileCheck %s
>
> %defaultOps does not seem to be necessary.
>
> Also, why are you running '-dce -polly -O3' at the end?
>
> I would expect something like:
>
> opt %loadPolly -polly-codegen-isl -polly-codegen-scev %vector-opt -S
>
> Also, if the test case is not immediately vectorized, it may be
> necessary to reduce the number of loop iterations to a smaller number (4
> e.g.).
>
>> +; ModuleID = 'reverse.c'
>> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
>> +target triple = "x86_64-unknown-linux-gnu"
>> +
>> +;int A[100];
>> +;void foo() {
>> +;  for (int i=999; i >= 0; i--)
>> +;    A[i]+=1;
>> +;}
>> +
>> +
>> + at A = common global [100 x i32] zeroinitializer, align 16
>> +
>> +; Function Attrs: nounwind uwtable
>> +define void @foo() #0 {
>> +entry:
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds = %entry,
>> %for.body
>> +  %indvars.iv = phi i64 [ 999, %entry ], [ %indvars.iv.next, %for.body
>> ]
>> +  %arrayidx = getelementptr inbounds [100 x i32]* @A, i64 0, i64
>> %indvars.iv
>> +  %0 = load i32* %arrayidx, align 4, !tbaa !1
>> +  %add = add nsw i32 %0, 1
>> +  store i32 %add, i32* %arrayidx, align 4, !tbaa !1
>> +  %indvars.iv.next = add nsw i64 %indvars.iv, -1
>> +  %1 = trunc i64 %indvars.iv to i32
>> +  %cmp = icmp sgt i32 %1, 0
>> +  br i1 %cmp, label %for.body, label %for.end
>> +
>> +for.end:                                          ; preds = %for.body
>> +  ret void
>> +}
>> +
>> +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false"
>> "no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
>> "no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
>> "unsafe-fp-math"="false" "use-soft-float"="false" }
>
> Can you remove unnecessary attributes.
>
>> +!llvm.ident = !{!0}
>> +
>> +!0 = metadata !{metadata !"clang version 3.5.0
>> (http://llvm.org/git/clang.git
>> b6b24026464c7c5053e1903a27f4dd1f95faf5c8) (http://llvm.org/git/llvm.git
>> 824dfb1c5690ad1494572c95909c0d9dc27338c8)"}
>> +!1 = metadata !{metadata !2, metadata !2, i64 0}
>> +!2 = metadata !{metadata !"int", metadata !3, i64 0}
>> +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
>> +!4 = metadata !{metadata !"Simple C/C++ TBAA"}
>
> Can you remove unnecessary metadata.
>
>
>> +
>> +; CHECK: @foo
>> +; CHECK: [[LOAD:%[a-zA-Z0-9_]+]] = load <4 x i32>*
>> +; CHECK: [[REVERSE_LOAD:%[a-zA-Z0-9_]+reverse]] = shufflevector <4 x
>> i32> [[LOAD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
>> -- 1.7.8.3
>
>


-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Support-for-generating-vectors-for-loads-with-1-stri.patch
Type: application/octet-stream
Size: 5572 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140319/e61f6a79/attachment.obj>


More information about the llvm-commits mailing list