[polly] r204257 - Support for generating vectors for loads with -1 stride

Tobias Grosser tobias at grosser.es
Wed Mar 19 12:27:24 PDT 2014


Author: grosser
Date: Wed Mar 19 14:27:24 2014
New Revision: 204257

URL: http://llvm.org/viewvc/llvm-project?rev=204257&view=rev
Log:
Support for generating vectors for loads with -1 stride

This patch enables vectorization of loops containing backward array
traversal (array stride is -1).

Contributed-by: Chris Jenneisch <chrisj at codeaurora.org>

Added:
    polly/trunk/test/Isl/CodeGen/simple_vec_stride_negative_one.ll
Modified:
    polly/trunk/include/polly/CodeGen/BlockGenerators.h
    polly/trunk/lib/CodeGen/BlockGenerators.cpp

Modified: polly/trunk/include/polly/CodeGen/BlockGenerators.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/BlockGenerators.h?rev=204257&r1=204256&r2=204257&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/BlockGenerators.h (original)
+++ polly/trunk/include/polly/CodeGen/BlockGenerators.h Wed Mar 19 14:27:24 2014
@@ -265,7 +265,15 @@ private:
   /// %vector_ptr= bitcast double* %p to <4 x double>*
   /// %vec_full = load <4 x double>* %vector_ptr
   ///
-  Value *generateStrideOneLoad(const LoadInst *Load, ValueMapT &BBMap);
+  /// @param NegativeStride This is used to indicate a -1 stride. In such
+  ///                       a case we load the end of a base address and
+  ///                       shuffle the accesses in reverse order into the
+  ///                       vector. By default we would do only positive
+  ///                       strides.
+  ///
+  Value *generateStrideOneLoad(const LoadInst *Load,
+                               VectorValueMapT &ScalarMaps,
+                               bool NegativeStride);
 
   /// @brief Load a vector initialized from a single scalar in memory
   ///

Modified: polly/trunk/lib/CodeGen/BlockGenerators.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/BlockGenerators.cpp?rev=204257&r1=204256&r2=204257&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/BlockGenerators.cpp (original)
+++ polly/trunk/lib/CodeGen/BlockGenerators.cpp Wed Mar 19 14:27:24 2014
@@ -439,12 +439,18 @@ Type *VectorBlockGenerator::getVectorPtr
   return PointerType::getUnqual(VectorType);
 }
 
-Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
-                                                   ValueMapT &BBMap) {
+Value *
+VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
+                                            VectorValueMapT &ScalarMaps,
+                                            bool NegativeStride = false) {
+  unsigned VectorWidth = getVectorWidth();
   const Value *Pointer = Load->getPointerOperand();
-  Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
-  Value *NewPointer =
-      getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load));
+  Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
+  unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
+
+  Value *NewPointer = NULL;
+  NewPointer = getNewValue(Pointer, ScalarMaps[Offset], GlobalMaps[Offset],
+                           VLTS[Offset], getLoopForInst(Load));
   Value *VectorPtr =
       Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
   LoadInst *VecLoad =
@@ -452,6 +458,16 @@ Value *VectorBlockGenerator::generateStr
   if (!Aligned)
     VecLoad->setAlignment(8);
 
+  if (NegativeStride) {
+    SmallVector<Constant *, 16> Indices;
+    for (int i = VectorWidth - 1; i >= 0; i--)
+      Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
+    Constant *SV = llvm::ConstantVector::get(Indices);
+    Value *RevVecLoad = Builder.CreateShuffleVector(
+        VecLoad, VecLoad, SV, Load->getName() + "_reverse");
+    return RevVecLoad;
+  }
+
   return VecLoad;
 }
 
@@ -516,7 +532,9 @@ void VectorBlockGenerator::generateLoad(
   if (Access.isStrideZero(isl_map_copy(Schedule)))
     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
   else if (Access.isStrideOne(isl_map_copy(Schedule)))
-    NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
+    NewLoad = generateStrideOneLoad(Load, ScalarMaps);
+  else if (Access.isStrideX(isl_map_copy(Schedule), -1))
+    NewLoad = generateStrideOneLoad(Load, ScalarMaps, true);
   else
     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
 

Added: polly/trunk/test/Isl/CodeGen/simple_vec_stride_negative_one.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_stride_negative_one.ll?rev=204257&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_stride_negative_one.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_stride_negative_one.ll Wed Mar 19 14:27:24 2014
@@ -0,0 +1,38 @@
+; RUN: opt %loadPolly -polly-codegen-isl -polly-codegen-scev %vector-opt -S < %s | FileCheck %s
+
+; ModuleID = 'reverse.c'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;int A[100];
+;void foo() {
+;  for (int i=3; i >= 0; i--)
+;    A[i]+=1;
+;}
+
+
+ at A = common global [100 x i32] zeroinitializer, align 16
+
+; Function Attrs: nounwind uwtable
+define void @foo() #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 3, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [100 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %arrayidx, align 4
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %1 = trunc i64 %indvars.iv to i32
+  %cmp = icmp sgt i32 %1, 0
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; CHECK: @foo
+; CHECK: [[LOAD:%[a-zA-Z0-9_]+]] = load <4 x i32>*
+; CHECK: [[REVERSE_LOAD:%[a-zA-Z0-9_]+reverse]] = shufflevector <4 x i32> [[LOAD]], <4 x i32> [[LOAD]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>





More information about the llvm-commits mailing list