[llvm-commits] [polly] r170420 - in /polly/trunk: include/polly/CodeGen/ lib/CodeGen/ test/Isl/CodeGen/

Sebastian Pop spop at codeaurora.org
Mon Dec 17 23:46:14 PST 2012


Author: spop
Date: Tue Dec 18 01:46:13 2012
New Revision: 170420

URL: http://llvm.org/viewvc/llvm-project?rev=170420&view=rev
Log:
isl: vector code generation based on ISL ast

Original patch by Tobias Grosser, slightly modified by Sebastian Pop.

Added:
    polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar.ll
    polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar_2.ll
    polly/trunk/test/Isl/CodeGen/simple_vec_call.ll
    polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll
    polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll
    polly/trunk/test/Isl/CodeGen/simple_vec_const.ll
    polly/trunk/test/Isl/CodeGen/simple_vec_large_width.ll
    polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll
    polly/trunk/test/Isl/CodeGen/simple_vec_stride_x.ll
    polly/trunk/test/Isl/CodeGen/simple_vec_two_stmts.ll
Modified:
    polly/trunk/include/polly/CodeGen/CodeGeneration.h
    polly/trunk/include/polly/CodeGen/IslAst.h
    polly/trunk/lib/CodeGen/CodeGeneration.cpp
    polly/trunk/lib/CodeGen/IslAst.cpp
    polly/trunk/lib/CodeGen/IslCodeGeneration.cpp

Modified: polly/trunk/include/polly/CodeGen/CodeGeneration.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/CodeGeneration.h?rev=170420&r1=170419&r2=170420&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/CodeGeneration.h (original)
+++ polly/trunk/include/polly/CodeGen/CodeGeneration.h Tue Dec 18 01:46:13 2012
@@ -14,6 +14,9 @@
 
 #include "polly/Config/config.h"
 
+#include "isl/set.h"
+#include "isl/map.h"
+
 namespace polly {
   enum VectorizerChoice {
     VECTORIZER_NONE,
@@ -23,6 +26,46 @@
     VECTORIZER_BB
   };
   extern VectorizerChoice PollyVectorizerChoice;
+
+  static inline int getNumberOfIterations(__isl_take isl_set *Domain) {
+    int Dim = isl_set_dim(Domain, isl_dim_set);
+
+    // Calculate a map similar to the identity map, but with the last input
+    // and output dimension not related.
+    //  [i0, i1, i2, i3] -> [i0, i1, i2, o0]
+    isl_space *Space = isl_set_get_space(Domain);
+    Space = isl_space_drop_outputs(Space, Dim - 2, 1);
+    Space = isl_space_map_from_set(Space);
+    isl_map *Identity = isl_map_identity(Space);
+    Identity = isl_map_add_dims(Identity, isl_dim_in, 1);
+    Identity = isl_map_add_dims(Identity, isl_dim_out, 1);
+
+    isl_map *Map = isl_map_from_domain_and_range(isl_set_copy(Domain), Domain);
+    Map = isl_map_intersect(Map, Identity);
+
+    isl_map *LexMax = isl_map_lexmax(isl_map_copy(Map));
+    isl_map *LexMin = isl_map_lexmin(Map);
+    isl_map *Sub = isl_map_sum(LexMax, isl_map_neg(LexMin));
+
+    isl_set *Elements = isl_map_range(Sub);
+
+    if (!isl_set_is_singleton(Elements)) {
+      isl_set_free(Elements);
+      return -1;
+    }
+
+    isl_point *P = isl_set_sample_point(Elements);
+
+    isl_int V;
+    isl_int_init(V);
+    isl_point_get_coordinate(P, isl_dim_set, Dim - 1, &V);
+    int NumberIterations = isl_int_get_si(V);
+    isl_int_clear(V);
+    isl_point_free(P);
+
+    return NumberIterations;
+  }
+
 }
 
 #endif // POLLY_CODEGENERATION_H

Modified: polly/trunk/include/polly/CodeGen/IslAst.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/IslAst.h?rev=170420&r1=170419&r2=170420&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/IslAst.h (original)
+++ polly/trunk/include/polly/CodeGen/IslAst.h Tue Dec 18 01:46:13 2012
@@ -25,6 +25,8 @@
 #include "polly/Config/config.h"
 #include "polly/ScopPass.h"
 
+#include "isl/ast.h"
+
 struct clast_name;
 namespace llvm {
   class raw_ostream;
@@ -38,9 +40,15 @@
   class Scop;
   class IslAst;
 
+  // Information about an ast node.
   struct IslAstUser {
     struct isl_ast_build *Context;
     struct isl_pw_multi_aff *PMA;
+    // The node is the outermost parallel loop.
+    int IsOutermostParallel;
+
+    // The node is the innermost parallel loop.
+    int IsInnermostParallel;
   };
 
   class IslAstInfo: public ScopPass {
@@ -61,6 +69,34 @@
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
     virtual void releaseMemory();
   };
+
+  // Returns true when Node has been tagged as an innermost parallel loop.
+  static inline bool isInnermostParallel(__isl_keep isl_ast_node *Node) {
+    isl_id *Id = isl_ast_node_get_annotation(Node);
+    if (!Id)
+      return false;
+    struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id);
+
+    bool Res = false;
+    if (Info)
+      Res = Info->IsInnermostParallel;
+    isl_id_free(Id);
+    return Res;
+  }
+
+  // Returns true when Node has been tagged as an outermost parallel loop.
+  static inline bool isOutermostParallel(__isl_keep isl_ast_node *Node) {
+    isl_id *Id = isl_ast_node_get_annotation(Node);
+    if (!Id)
+      return false;
+    struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id);
+
+    bool Res = false;
+    if (Info)
+      Res = Info->IsOutermostParallel;
+    isl_id_free(Id);
+    return Res;
+  }
 }
 
 namespace llvm {

Modified: polly/trunk/lib/CodeGen/CodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/CodeGeneration.cpp?rev=170420&r1=170419&r2=170420&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/CodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/CodeGeneration.cpp Tue Dec 18 01:46:13 2012
@@ -768,45 +768,9 @@
   return true;
 }
 
-int ClastStmtCodeGen::getNumberOfIterations(const clast_for *f) {
-  isl_set *loopDomain = isl_set_copy(isl_set_from_cloog_domain(f->domain));
-  isl_set *tmp = isl_set_copy(loopDomain);
-
-  // Calculate a map similar to the identity map, but with the last input
-  // and output dimension not related.
-  //  [i0, i1, i2, i3] -> [i0, i1, i2, o0]
-  isl_space *Space = isl_set_get_space(loopDomain);
-  Space = isl_space_drop_outputs(Space,
-                                 isl_set_dim(loopDomain, isl_dim_set) - 2, 1);
-  Space = isl_space_map_from_set(Space);
-  isl_map *identity = isl_map_identity(Space);
-  identity = isl_map_add_dims(identity, isl_dim_in, 1);
-  identity = isl_map_add_dims(identity, isl_dim_out, 1);
-
-  isl_map *map = isl_map_from_domain_and_range(tmp, loopDomain);
-  map = isl_map_intersect(map, identity);
-
-  isl_map *lexmax = isl_map_lexmax(isl_map_copy(map));
-  isl_map *lexmin = isl_map_lexmin(map);
-  isl_map *sub = isl_map_sum(lexmax, isl_map_neg(lexmin));
-
-  isl_set *elements = isl_map_range(sub);
-
-  if (!isl_set_is_singleton(elements)) {
-    isl_set_free(elements);
-    return -1;
-  }
-
-  isl_point *p = isl_set_sample_point(elements);
-
-  isl_int v;
-  isl_int_init(v);
-  isl_point_get_coordinate(p, isl_dim_set, isl_set_n_dim(loopDomain) - 1, &v);
-  int numberIterations = isl_int_get_si(v);
-  isl_int_clear(v);
-  isl_point_free(p);
-
-  return (numberIterations) / isl_int_get_si(f->stride) + 1;
+int ClastStmtCodeGen::getNumberOfIterations(const clast_for *For) {
+  isl_set *LoopDomain = isl_set_copy(isl_set_from_cloog_domain(For->domain));
+  return polly::getNumberOfIterations(LoopDomain) / isl_int_get_si(For->stride) + 1;
 }
 
 void ClastStmtCodeGen::codegenForVector(const clast_for *F) {

Modified: polly/trunk/lib/CodeGen/IslAst.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslAst.cpp?rev=170420&r1=170419&r2=170420&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslAst.cpp (original)
+++ polly/trunk/lib/CodeGen/IslAst.cpp Tue Dec 18 01:46:13 2012
@@ -19,6 +19,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "polly/CodeGen/CodeGeneration.h"
 #include "polly/CodeGen/IslAst.h"
 
 #include "polly/LinkAllPasses.h"
@@ -31,7 +32,6 @@
 
 #include "isl/union_map.h"
 #include "isl/list.h"
-#include "isl/ast.h"
 #include "isl/ast_build.h"
 #include "isl/set.h"
 #include "isl/map.h"
@@ -68,24 +68,6 @@
 };
 } // End namespace polly.
 
-
-static void IslAstUserFree(void *User)
-{
-  struct IslAstUser *UserStruct = (struct IslAstUser *) User;
-  isl_ast_build_free(UserStruct->Context);
-  isl_pw_multi_aff_free(UserStruct->PMA);
-  free(UserStruct);
-}
-
-// Information about an ast node.
-struct AstNodeUserInfo {
-  // The node is the outermost parallel loop.
-  int IsOutermostParallel;
-
-  // The node is the innermost parallel loop.
-  int IsInnermostParallel;
-};
-
 // Temporary information used when building the ast.
 struct AstBuildUserInfo {
   // The dependence information.
@@ -99,7 +81,7 @@
 static __isl_give isl_printer *
 printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer,
                  __isl_take isl_ast_print_options *PrintOptions,
-                 AstNodeUserInfo *Info) {
+                 IslAstUser *Info) {
   if (Info) {
     if (Info->IsInnermostParallel) {
       Printer = isl_printer_start_line(Printer);
@@ -124,26 +106,29 @@
   if (!Id)
     return isl_ast_node_for_print(Node, Printer, PrintOptions);
 
-  struct AstNodeUserInfo *Info = (struct AstNodeUserInfo *) isl_id_get_user(Id);
+  struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id);
   Printer = printParallelFor(Node, Printer, PrintOptions, Info);
   isl_id_free(Id);
   return Printer;
 }
 
 // Allocate an AstNodeInfo structure and initialize it with default values.
-static struct AstNodeUserInfo *allocateAstNodeUserInfo() {
-  struct AstNodeUserInfo *NodeInfo;
-  NodeInfo = (struct AstNodeUserInfo *) malloc(sizeof(struct AstNodeUserInfo));
+static struct IslAstUser *allocateIslAstUser() {
+  struct IslAstUser *NodeInfo;
+  NodeInfo = (struct IslAstUser *) malloc(sizeof(struct IslAstUser));
+  NodeInfo->PMA = 0;
+  NodeInfo->Context = 0;
   NodeInfo->IsOutermostParallel = 0;
   NodeInfo->IsInnermostParallel = 0;
   return NodeInfo;
 }
 
 // Free the AstNodeInfo structure.
-static void freeAstNodeUserInfo(void *Ptr) {
-  struct AstNodeUserInfo *Info;
-  Info = (struct AstNodeUserInfo *) Ptr;
-  free(Info);
+static void freeIslAstUser(void *Ptr) {
+  struct IslAstUser *UserStruct = (struct IslAstUser *) Ptr;
+  isl_ast_build_free(UserStruct->Context);
+  isl_pw_multi_aff_free(UserStruct->PMA);
+  free(UserStruct);
 }
 
 // Check if the current scheduling dimension is parallel.
@@ -200,7 +185,7 @@
 // Mark a for node openmp parallel, if it is the outermost parallel for node.
 static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
                                struct AstBuildUserInfo *BuildInfo,
-                               struct AstNodeUserInfo *NodeInfo) {
+                               struct IslAstUser *NodeInfo) {
   if (BuildInfo->InParallelFor)
     return;
 
@@ -219,14 +204,10 @@
 //
 static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build,
                                             void *User) {
-  isl_id *Id;
-  struct AstBuildUserInfo *BuildInfo;
-  struct AstNodeUserInfo *NodeInfo;
-
-  BuildInfo = (struct AstBuildUserInfo *) User;
-  NodeInfo = allocateAstNodeUserInfo();
-  Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo);
-  Id = isl_id_set_free_user(Id, freeAstNodeUserInfo);
+  struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *) User;
+  struct IslAstUser *NodeInfo = allocateIslAstUser();
+  isl_id *Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo);
+  Id = isl_id_set_free_user(Id, freeIslAstUser);
 
   markOpenmpParallel(Build, BuildInfo, NodeInfo);
 
@@ -286,7 +267,7 @@
   isl_id *Id = isl_ast_node_get_annotation(Node);
   if (!Id)
     return Node;
-  struct AstNodeUserInfo *Info = (struct AstNodeUserInfo *) isl_id_get_user(Id);
+  struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id);
   struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *) User;
   if (Info) {
     if (Info->IsOutermostParallel)
@@ -296,28 +277,36 @@
         Info->IsInnermostParallel = 1;
   }
 
-  isl_id_free(Id);
+  if (!Info->Context)
+    Info->Context = isl_ast_build_copy(Build);
 
+  isl_id_free(Id);
   return Node;
 }
 
 static __isl_give isl_ast_node *
-AtEachDomain(__isl_keep isl_ast_node *Node,
+AtEachDomain(__isl_take isl_ast_node *Node,
              __isl_keep isl_ast_build *Context, void *User)
 {
-  isl_map *Map;
-  struct IslAstUser *UserStruct;
+  struct IslAstUser *Info = NULL;
+  isl_id *Id = isl_ast_node_get_annotation(Node);
+
+  if (Id)
+    Info = (struct IslAstUser *) isl_id_get_user(Id);
+
+  if (!Info) {
+    // Allocate annotations once: parallel for detection might have already
+    // allocated the annotations for this node.
+    Info = allocateIslAstUser();
+    Id = isl_id_alloc(isl_ast_node_get_ctx(Node), NULL, Info);
+    Id = isl_id_set_free_user(Id, &freeIslAstUser);
+  }
 
-  UserStruct = (struct IslAstUser *) malloc(sizeof(struct IslAstUser));
+  isl_map *Map = isl_map_from_union_map(isl_ast_build_get_schedule(Context));
+  Info->PMA = isl_pw_multi_aff_from_map(isl_map_reverse(Map));
+  Info->Context = isl_ast_build_copy(Context);
 
-  Map = isl_map_from_union_map(isl_ast_build_get_schedule(Context));
-  UserStruct->PMA = isl_pw_multi_aff_from_map(isl_map_reverse(Map));
-  UserStruct->Context = isl_ast_build_copy(Context);
-
-  isl_id *Annotation = isl_id_alloc(isl_ast_node_get_ctx(Node), NULL,
-                                    UserStruct);
-  Annotation = isl_id_set_free_user(Annotation, &IslAstUserFree);
-  return isl_ast_node_set_annotation(Node, Annotation);
+  return isl_ast_node_set_annotation(Node, Id);
 }
 
 IslAst::IslAst(Scop *Scop, Dependences &D) : S(Scop) {
@@ -343,7 +332,7 @@
     isl_union_map_dump(Schedule);
   );
 
-  if (DetectParallel) {
+  if (DetectParallel || PollyVectorizerChoice != VECTORIZER_NONE) {
     BuildInfo.Deps = &D;
     BuildInfo.InParallelFor = 0;
 

Modified: polly/trunk/lib/CodeGen/IslCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslCodeGeneration.cpp?rev=170420&r1=170419&r2=170420&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/IslCodeGeneration.cpp Tue Dec 18 01:46:13 2012
@@ -26,6 +26,7 @@
 #include "polly/TempScopInfo.h"
 #include "polly/CodeGen/IslAst.h"
 #include "polly/CodeGen/BlockGenerators.h"
+#include "polly/CodeGen/CodeGeneration.h"
 #include "polly/CodeGen/LoopGenerators.h"
 #include "polly/CodeGen/Utils.h"
 #include "polly/Support/GICHelper.h"
@@ -579,8 +580,23 @@
   __isl_give isl_ast_expr *getUpperBound(__isl_keep isl_ast_node *For,
                                          CmpInst::Predicate &Predicate);
 
+  unsigned getNumberOfIterations(__isl_keep isl_ast_node *For);
+
   void createFor(__isl_take isl_ast_node *For);
+  void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
+  void createForSequential(__isl_take isl_ast_node *For);
+  void createSubstitutions(__isl_take isl_pw_multi_aff *PMA,
+                           __isl_take isl_ast_build *Context,
+                           ScopStmt *Stmt, ValueMapT &VMap);
+  void createSubstitutionsVector(__isl_take isl_pw_multi_aff *PMA,
+                                 __isl_take isl_ast_build *Context,
+                                 ScopStmt *Stmt, VectorValueMapT &VMap,
+                                 std::vector<Value*> &IVS,
+                                 __isl_take isl_id *IteratorID);
   void createIf(__isl_take isl_ast_node *If);
+  void createUserVector(__isl_take isl_ast_node *User,
+                        std::vector<Value*> &IVS, __isl_take isl_id *IteratorID,
+                        __isl_take isl_union_map *Schedule);
   void createUser(__isl_take isl_ast_node *User);
   void createBlock(__isl_take isl_ast_node *Block);
 };
@@ -635,7 +651,128 @@
   return UB;
 }
 
-void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
+unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) {
+  isl_id *Annotation = isl_ast_node_get_annotation(For);
+  if (!Annotation)
+    return -1;
+
+  struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation);
+  if (!Info) {
+    isl_id_free(Annotation);
+    return -1;
+  }
+
+  isl_union_map *Schedule = isl_ast_build_get_schedule(Info->Context);
+  isl_set *LoopDomain = isl_set_from_union_set(isl_union_map_range(Schedule));
+  isl_id_free(Annotation);
+  return polly::getNumberOfIterations(LoopDomain) + 1;
+}
+
+void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
+                                      std::vector<Value*> &IVS,
+                                      __isl_take isl_id *IteratorID,
+                                      __isl_take isl_union_map *Schedule) {
+  isl_id *Annotation = isl_ast_node_get_annotation(User);
+  assert(Annotation && "Vector user statement is not annotated");
+
+  struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation);
+  assert(Info && "Vector user statement annotation does not contain info");
+
+  isl_id *Id = isl_pw_multi_aff_get_tuple_id(Info->PMA, isl_dim_out);
+  ScopStmt *Stmt = (ScopStmt *) isl_id_get_user(Id);
+  VectorValueMapT VectorMap(IVS.size());
+
+  isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain());
+  Schedule = isl_union_map_intersect_domain(Schedule, Domain);
+  isl_map *S = isl_map_from_union_map(Schedule);
+
+  createSubstitutionsVector(isl_pw_multi_aff_copy(Info->PMA),
+                            isl_ast_build_copy(Info->Context),
+                            Stmt, VectorMap, IVS, IteratorID);
+  VectorBlockGenerator::generate(Builder, *Stmt, VectorMap, S, P);
+
+
+  isl_map_free(S);
+  isl_id_free(Annotation);
+  isl_id_free(Id);
+  isl_ast_node_free(User);
+}
+
+void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
+                                     int VectorWidth) {
+  isl_ast_node *Body = isl_ast_node_for_get_body(For);
+  isl_ast_expr *Init = isl_ast_node_for_get_init(For);
+  isl_ast_expr *Inc = isl_ast_node_for_get_inc(For);
+  isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For);
+  isl_id *IteratorID = isl_ast_expr_get_id(Iterator);
+  CmpInst::Predicate Predicate;
+  isl_ast_expr *UB = getUpperBound(For, Predicate);
+
+  Value *ValueLB = ExprBuilder.create(Init);
+  Value *ValueUB = ExprBuilder.create(UB);
+  Value *ValueInc = ExprBuilder.create(Inc);
+
+  Type *MaxType = ExprBuilder.getType(Iterator);
+  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
+  MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
+  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
+
+  if (MaxType != ValueLB->getType())
+    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
+  if (MaxType != ValueUB->getType())
+    ValueUB = Builder.CreateSExt(ValueUB, MaxType);
+  if (MaxType != ValueInc->getType())
+    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
+
+  std::vector<Value*> IVS(VectorWidth);
+  IVS[0] = ValueLB;
+
+  for (int i = 1; i < VectorWidth; i++)
+    IVS[i] = Builder.CreateAdd(IVS[i-1], ValueInc, "p_vector_iv");
+
+  isl_id *Annotation = isl_ast_node_get_annotation(For);
+  assert(Annotation && "For statement is not annotated");
+
+  struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation);
+  assert(Info && "For statement annotation does not contain info");
+
+  isl_union_map *Schedule = isl_ast_build_get_schedule(Info->Context);
+  assert(Schedule && "For statement annotation does not contain its schedule");
+
+  IDToValue[IteratorID] = ValueLB;
+
+  switch (isl_ast_node_get_type(Body)) {
+  case isl_ast_node_user:
+    createUserVector(Body, IVS, isl_id_copy(IteratorID),
+                     isl_union_map_copy(Schedule));
+    break;
+  case isl_ast_node_block: {
+    isl_ast_node_list *List = isl_ast_node_block_get_children(Body);
+
+    for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i)
+      createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS,
+                       isl_id_copy(IteratorID),
+                       isl_union_map_copy(Schedule));
+
+    isl_ast_node_free(Body);
+    isl_ast_node_list_free(List);
+    break;
+  }
+  default:
+    isl_ast_node_dump(Body);
+    llvm_unreachable("Unhandled isl_ast_node in vectorizer");
+  }
+
+  IDToValue.erase(IteratorID);
+  isl_id_free(IteratorID);
+  isl_id_free(Annotation);
+  isl_union_map_free(Schedule);
+
+  isl_ast_node_free(For);
+  isl_ast_expr_free(Iterator);
+}
+
+void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) {
   isl_ast_node *Body;
   isl_ast_expr *Init, *Inc, *Iterator, *UB;
   isl_id *IteratorID;
@@ -696,6 +833,19 @@
   isl_id_free(IteratorID);
 }
 
+void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
+  bool Vector = PollyVectorizerChoice != VECTORIZER_NONE;
+
+  if (Vector && isInnermostParallel(For)) {
+    int VectorWidth = getNumberOfIterations(For);
+    if (1 < VectorWidth && VectorWidth <= 16) {
+      createForVector(For, VectorWidth);
+      return;
+    }
+  }
+  createForSequential(For);
+}
+
 void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
   isl_ast_expr *Cond = isl_ast_node_if_get_cond(If);
 
@@ -738,26 +888,18 @@
   isl_ast_node_free(If);
 }
 
-void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
-  ValueMapT VMap;
-  struct IslAstUser *UserInfo;
-  isl_id *Annotation, *Id;
-  ScopStmt *Stmt;
-
-  Annotation = isl_ast_node_get_annotation(User);
-  UserInfo = (struct IslAstUser *) isl_id_get_user(Annotation);
-  Id = isl_pw_multi_aff_get_tuple_id(UserInfo->PMA, isl_dim_out);
-  Stmt = (ScopStmt *) isl_id_get_user(Id);
-
-  for (unsigned i = 0; i < isl_pw_multi_aff_dim(UserInfo->PMA, isl_dim_out);
+void IslNodeBuilder::createSubstitutions(__isl_take isl_pw_multi_aff *PMA,
+                         __isl_take isl_ast_build *Context,
+                         ScopStmt *Stmt, ValueMapT &VMap) {
+  for (unsigned i = 0; i < isl_pw_multi_aff_dim(PMA, isl_dim_out);
        ++i) {
     isl_pw_aff *Aff;
     isl_ast_expr *Expr;
     const Value *OldIV;
     Value *V;
 
-    Aff = isl_pw_multi_aff_get_pw_aff(UserInfo->PMA, i);
-    Expr = isl_ast_build_expr_from_pw_aff(UserInfo->Context, Aff);
+    Aff = isl_pw_multi_aff_get_pw_aff(PMA, i);
+    Expr = isl_ast_build_expr_from_pw_aff(Context, Aff);
     OldIV = Stmt->getInductionVariableForDimension(i);
     V = ExprBuilder.create(Expr);
 
@@ -768,6 +910,48 @@
     VMap[OldIV] = V;
   }
 
+  isl_pw_multi_aff_free(PMA);
+  isl_ast_build_free(Context);
+}
+
+void IslNodeBuilder::createSubstitutionsVector(__isl_take isl_pw_multi_aff *PMA,
+  __isl_take isl_ast_build *Context, ScopStmt *Stmt, VectorValueMapT &VMap,
+  std::vector<Value*> &IVS, __isl_take isl_id *IteratorID) {
+  int i = 0;
+
+  Value *OldValue = IDToValue[IteratorID];
+  for (std::vector<Value*>::iterator II = IVS.begin(), IE = IVS.end();
+      II != IE; ++II) {
+    IDToValue[IteratorID] = *II;
+    createSubstitutions(isl_pw_multi_aff_copy(PMA),
+                        isl_ast_build_copy(Context), Stmt, VMap[i]);
+    i++;
+  }
+
+  IDToValue[IteratorID] = OldValue;
+  isl_id_free(IteratorID);
+  isl_pw_multi_aff_free(PMA);
+  isl_ast_build_free(Context);
+}
+
+void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
+  ValueMapT VMap;
+  struct IslAstUser *Info;
+  isl_id *Annotation, *Id;
+  ScopStmt *Stmt;
+
+  Annotation = isl_ast_node_get_annotation(User);
+  assert(Annotation && "Scalar user statement is not annotated");
+
+  Info = (struct IslAstUser *) isl_id_get_user(Annotation);
+  assert(Info && "Scalar user statement annotation does not contain info");
+
+  Id = isl_pw_multi_aff_get_tuple_id(Info->PMA, isl_dim_out);
+  Stmt = (ScopStmt *) isl_id_get_user(Id);
+
+  createSubstitutions(isl_pw_multi_aff_copy(Info->PMA),
+                      isl_ast_build_copy(Info->Context), Stmt, VMap);
+
   BlockGenerator::generate(Builder, *Stmt, VMap, P);
 
   isl_ast_node_free(User);

Added: polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,65 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt -dce -S < %s | FileCheck %s
+
+;#define N 1024
+;float A[N];
+;float B[N];
+;
+;void simple_vec_const(void) {
+;  int i;
+;
+;  for (i = 0; i < 4; i++)
+;    B[i] = A[i] + 1;
+;}
+;int main()
+;{
+;  simple_vec_const();
+;  return A[42];
+;}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_const() nounwind {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb5, %bb
+  %indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ]
+  %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+  %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+  %exitcond = icmp ne i64 %indvar, 4
+  br i1 %exitcond, label %bb3, label %bb6
+
+bb3:                                              ; preds = %bb2
+  %tmp = load float* %scevgep1, align 4
+  %tmp4 = fadd float %tmp, 1.000000e+00
+  store float %tmp4, float* %scevgep, align 4
+  br label %bb5
+
+bb5:                                              ; preds = %bb3
+  %indvar.next = add i64 %indvar, 1
+  br label %bb2
+
+bb6:                                              ; preds = %bb2
+  ret void
+}
+
+define i32 @main() nounwind {
+bb:
+  call void @simple_vec_const()
+  %tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+  %tmp1 = fptosi float %tmp to i32
+  ret i32 %tmp1
+}
+
+; CHECK: %p_scevgep1.moved.to.bb3 = getelementptr [1024 x float]* @A, i64 0, i64 0
+; CHECK: %p_scevgep.moved.to.bb3 = getelementptr [1024 x float]* @B, i64 0, i64 0
+; CHECK: %vector_ptr = bitcast float* %p_scevgep1.moved.to.bb3 to <4 x float>*
+; CHECK: %tmp_p_vec_full = load <4 x float>* %vector_ptr, align 8
+; CHECK: %tmp4p_vec = fadd <4 x float> %tmp_p_vec_full, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK: %vector_ptr7 = bitcast float* %p_scevgep.moved.to.bb3 to <4 x float>*
+; CHECK: store <4 x float> %tmp4p_vec, <4 x float>* %vector_ptr7, align 8
+

Added: polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar_2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar_2.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar_2.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar_2.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,66 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt -dce -S < %s | FileCheck %s
+
+;#define N 1024
+;float A[N];
+;float B[N];
+;
+;void simple_vec_const(void) {
+;  int i;
+;
+;  for (i = 0; i < 4; i++)
+;    B[i] = A[i] + i;
+;}
+;int main()
+;{
+;  simple_vec_const();
+;  return A[42];
+;}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_const() nounwind {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb6, %bb
+  %indvar = phi i64 [ %indvar.next, %bb6 ], [ 0, %bb ]
+  %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+  %i.0 = trunc i64 %indvar to i32
+  %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+  %exitcond = icmp ne i64 %indvar, 4
+  br i1 %exitcond, label %bb3, label %bb7
+
+bb3:                                              ; preds = %bb2
+  %tmp = load float* %scevgep1, align 4
+  %tmp4 = sitofp i32 %i.0 to float
+  %tmp5 = fadd float %tmp, %tmp4
+  store float %tmp5, float* %scevgep, align 4
+  br label %bb6
+
+bb6:                                              ; preds = %bb3
+  %indvar.next = add i64 %indvar, 1
+  br label %bb2
+
+bb7:                                              ; preds = %bb2
+  ret void
+}
+
+define i32 @main() nounwind {
+bb:
+  call void @simple_vec_const()
+  %tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+  %tmp1 = fptosi float %tmp to i32
+  ret i32 %tmp1
+}
+
+
+; CHECK: insertelement <4 x float> undef, float %{{[^,]+}}, i32 0
+; CHECK: insertelement <4 x float> %0, float %{{[^,]+}}, i32 1
+; CHECK: insertelement <4 x float> %1, float %{{[^,]+}}, i32 2
+; CHECK: insertelement <4 x float> %2, float %{{[^,]+}}, i32 3
+; CHECK: fadd <4 x float> %tmp_p_vec_full, %3
+

Added: polly/trunk/test/Isl/CodeGen/simple_vec_call.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_call.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_call.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_call.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,42 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+declare float @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+  br label %body
+
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+  %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+  %value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+  %result = tail call float @foo(float %value) nounwind
+  store float %result, float* %scevgep, align 4
+  %indvar_next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar_next, 4
+  br i1 %exitcond, label %return, label %body
+
+return:
+  ret void
+}
+
+; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
+; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
+; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
+; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
+; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %0) nounwind
+; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %1) nounwind
+; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %2) nounwind
+; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %3) nounwind
+; CHECK: %4 = insertelement <4 x float> undef, float [[RES1]], i32 0
+; CHECK: %5 = insertelement <4 x float> %4, float [[RES2]], i32 1
+; CHECK: %6 = insertelement <4 x float> %5, float [[RES3]], i32 2
+; CHECK: %7 = insertelement <4 x float> %6, float [[RES4]], i32 3
+; CHECK:  store <4 x float> %7

Added: polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,61 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -polly-codegen-scev=false -dce -S < %s | FileCheck %s
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -polly-codegen-scev=true -dce -S < %s | FileCheck %s -check-prefix=CHECK-SCEV
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float**] zeroinitializer, align 16
+
+declare float** @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+  br label %body
+
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+  %scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
+  %value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+  %result = tail call float** @foo(float %value) nounwind
+  store float** %result, float*** %scevgep, align 4
+  %indvar_next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar_next, 4
+  br i1 %exitcond, label %return, label %body
+
+return:
+  ret void
+}
+
+; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
+; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
+; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
+; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
+; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
+; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %0) nounwind
+; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %1) nounwind
+; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %2) nounwind
+; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %3) nounwind
+; CHECK: %4 = insertelement <4 x float**> undef, float** %p_result, i32 0
+; CHECK: %5 = insertelement <4 x float**> %4, float** %p_result4, i32 1
+; CHECK: %6 = insertelement <4 x float**> %5, float** %p_result5, i32 2
+; CHECK: %7 = insertelement <4 x float**> %6, float** %p_result6, i32 3
+; CHECK: %vector_ptr = bitcast float*** %p_scevgep to <4 x float**>*
+; CHECK: store <4 x float**> %7, <4 x float**>* %vector_ptr, align 8
+
+; CHECK-SCEV: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK-SCEV: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK-SCEV: %0 = extractelement <4 x float> %value_p_splat, i32 0
+; CHECK-SCEV: %1 = extractelement <4 x float> %value_p_splat, i32 1
+; CHECK-SCEV: %2 = extractelement <4 x float> %value_p_splat, i32 2
+; CHECK-SCEV: %3 = extractelement <4 x float> %value_p_splat, i32 3
+; CHECK-SCEV: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %0) nounwind
+; CHECK-SCEV: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %1) nounwind
+; CHECK-SCEV: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %2) nounwind
+; CHECK-SCEV: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %3) nounwind
+; CHECK-SCEV: %4 = insertelement <4 x float**> undef, float** %p_result, i32 0
+; CHECK-SCEV: %5 = insertelement <4 x float**> %4, float** %p_result1, i32 1
+; CHECK-SCEV: %6 = insertelement <4 x float**> %5, float** %p_result2, i32 2
+; CHECK-SCEV: %7 = insertelement <4 x float**> %6, float** %p_result3, i32 3
+; CHECK-SCEV: store <4 x float**> %7, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align

Added: polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,38 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -dce -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x double] zeroinitializer, align 16
+
+define void @simple_vec_const() nounwind {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb3, %bb
+  %indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb ]
+  %scevgep = getelementptr [1024 x double]* @B, i64 0, i64 %indvar
+  %exitcond = icmp ne i64 %indvar, 4
+  br i1 %exitcond, label %bb2, label %bb4
+
+bb2:                                              ; preds = %bb1
+  %tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+  %tmp2 = fpext float %tmp to double
+  store double %tmp2, double* %scevgep, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb2
+  %indvar.next = add i64 %indvar, 1
+  br label %bb1
+
+bb4:                                              ; preds = %bb1
+  ret void
+}
+
+; CHECK: %p_scevgep.moved.to.bb2 = getelementptr [1024 x double]* @B, i64 0, i64 0
+; CHECK: %tmp_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK: %tmp_p_splat = shufflevector <1 x float> %tmp_p_splat_one, <1 x float> %tmp_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %0 = fpext <4 x float> %tmp_p_splat to <4 x double>
+; CHECK: %vector_ptr = bitcast double* %p_scevgep.moved.to.bb2 to <4 x double>*
+; CHECK: store <4 x double> %0, <4 x double>* %vector_ptr, align 8
+

Added: polly/trunk/test/Isl/CodeGen/simple_vec_const.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_const.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_const.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_const.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,57 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt -S < %s | FileCheck %s
+
+;#define N 1024
+;float A[N];
+;float B[N];
+;
+;void simple_vec_const(void) {
+;  int i;
+;
+;  for (i = 0; i < 4; i++)
+;    B[i] = A[0];
+;}
+;int main()
+;{
+;  simple_vec_const();
+;  return A[42];
+;}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_const() nounwind {
+; <label>:0
+  br label %1
+
+; <label>:1                                       ; preds = %4, %0
+  %indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ]
+  %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+  %exitcond = icmp ne i64 %indvar, 4
+  br i1 %exitcond, label %2, label %5
+
+; <label>:2                                       ; preds = %1
+  %3 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+  store float %3, float* %scevgep, align 4
+  br label %4
+
+; <label>:4                                       ; preds = %2
+  %indvar.next = add i64 %indvar, 1
+  br label %1
+
+; <label>:5                                       ; preds = %1
+  ret void
+}
+
+define i32 @main() nounwind {
+  call void @simple_vec_const()
+  %1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+  %2 = fptosi float %1 to i32
+  ret i32 %2
+}
+
+
+; CHECK: load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*)
+; CHECK: shufflevector <1 x float> {{.*}}, <1 x float> {{.*}} <4 x i32> zeroinitializer

Added: polly/trunk/test/Isl/CodeGen/simple_vec_large_width.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_large_width.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_large_width.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_large_width.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,40 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt -dce -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_large_width() nounwind {
+; <label>:0
+  br label %1
+
+; <label>:1                                       ; preds = %4, %0
+  %indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ]
+  %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+  %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+  %exitcond = icmp ne i64 %indvar, 15
+  br i1 %exitcond, label %2, label %5
+
+; <label>:2                                       ; preds = %1
+  %3 = load float* %scevgep1, align 4
+  store float %3, float* %scevgep, align 4
+  br label %4
+
+; <label>:4                                       ; preds = %2
+  %indvar.next = add i64 %indvar, 1
+  br label %1
+
+; <label>:5                                       ; preds = %1
+  ret void
+}
+
+define i32 @main() nounwind {
+  call void @simple_vec_large_width()
+  %1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+  %2 = fptosi float %1 to i32
+  ret i32 %2
+}
+
+; CHECK: [[VEC1:%[a-zA-Z0-9_]+_full]] = load <15 x float>*
+; CHECK: store <15 x float> [[VEC1]]

Added: polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,40 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -S -polly-codegen-scev=false < %s | FileCheck %s
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -S -polly-codegen-scev=true < %s | FileCheck %s -check-prefix=CHECK-SCEV
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float**] zeroinitializer, align 16
+ at B = common global [1024 x float**] zeroinitializer, align 16
+
+declare float @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+  br label %body
+
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+  %scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
+  %value = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0), align 16
+  store float** %value, float*** %scevgep, align 4
+  %indvar_next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar_next, 4
+  br i1 %exitcond, label %return, label %body
+
+return:
+  ret void
+}
+
+; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
+; CHECK: %p_scevgep1 = getelementptr [1024 x float**]* @B, i64 0, i64 1
+; CHECK: %p_scevgep2 = getelementptr [1024 x float**]* @B, i64 0, i64 2
+; CHECK: %p_scevgep3 = getelementptr [1024 x float**]* @B, i64 0, i64 3
+; CHECK: %value_p_splat_one = load <1 x float**>* bitcast ([1024 x float**]* @A to <1 x float**>*), align 8
+; CHECK: %value_p_splat = shufflevector <1 x float**> %value_p_splat_one, <1 x float**> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %vector_ptr = bitcast float*** %p_scevgep to <4 x float**>*
+; CHECK: store <4 x float**> %value_p_splat, <4 x float**>* %vector_ptr
+
+
+; CHECK-SCEV: %value_p_splat_one = load <1 x float**>* bitcast ([1024 x float**]* @A to <1 x float**>*), align 8
+; CHECK-SCEV: %value_p_splat = shufflevector <1 x float**> %value_p_splat_one, <1 x float**> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK-SCEV: store <4 x float**> %value_p_splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8

Added: polly/trunk/test/Isl/CodeGen/simple_vec_stride_x.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_stride_x.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_stride_x.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_stride_x.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,73 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt  -dce -S < %s | FileCheck %s
+
+;#define N 1024
+;float A[N];
+;float B[N];
+;
+;void simple_vec_stride_x(void) {
+;  int i;
+;
+;  for (i = 0; i < 4; i++)
+;    B[2 * i] = A[2 * i];
+;}
+;int main()
+;{
+;  simple_vec_stride_x();
+;  return A[42];
+;}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_stride_x() nounwind {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb5, %bb
+  %indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ]
+  %tmp = mul i64 %indvar, 2
+  %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %tmp
+  %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %tmp
+  %exitcond = icmp ne i64 %indvar, 4
+  br i1 %exitcond, label %bb3, label %bb6
+
+bb3:                                              ; preds = %bb2
+  %tmp4 = load float* %scevgep1, align 8
+  store float %tmp4, float* %scevgep, align 8
+  br label %bb5
+
+bb5:                                              ; preds = %bb3
+  %indvar.next = add i64 %indvar, 1
+  br label %bb2
+
+bb6:                                              ; preds = %bb2
+  ret void
+}
+
+define i32 @main() nounwind {
+bb:
+  call void @simple_vec_stride_x()
+  %tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+  %tmp1 = fptosi float %tmp to i32
+  ret i32 %tmp1
+}
+
+; CHECK: [[LOAD1:%[a-zA-Z0-9_]+_scalar_]] = load float*
+; CHECK: [[VEC1:%[a-zA-Z0-9_]+]] = insertelement <4 x float> undef, float [[LOAD1]], i32 0
+; CHECK: [[LOAD2:%[a-zA-Z0-9_]+]] = load float*
+; CHECK: [[VEC2:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC1]], float [[LOAD2]], i32 1
+; CHECK: [[LOAD3:%[a-zA-Z0-9_]+]] = load float*
+; CHECK: [[VEC3:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC2]], float [[LOAD3]], i32 2
+; CHECK: [[LOAD4:%[a-zA-Z0-9_]+]] = load float*
+; CHECK: [[VEC4:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC3]], float [[LOAD4]], i32 3
+; CHECK: [[EL1:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 0
+; CHECK: store float [[EL1]]
+; CHECK: [[EL2:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 1
+; CHECK: store float [[EL2]]
+; CHECK: [[EL3:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 2
+; CHECK: store float [[EL3]]
+; CHECK: [[EL4:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 3
+; CHECK: store float [[EL4]]

Added: polly/trunk/test/Isl/CodeGen/simple_vec_two_stmts.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_two_stmts.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_two_stmts.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_two_stmts.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,50 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt -dce -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+ at C = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_stride_one() nounwind {
+bb0:
+  br label %bb1
+
+bb1:
+  %indvar = phi i64 [ %indvar.next, %bb4 ], [ 0, %bb0 ]
+  %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+  %scevgep2 = getelementptr [1024 x float]* @C, i64 0, i64 %indvar
+  %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+  %exitcond = icmp ne i64 %indvar, 4
+  br i1 %exitcond, label %bb2a, label %bb5
+
+bb2a:
+  %tmp1 = load float* %scevgep1, align 4
+  store float %tmp1, float* %scevgep, align 4
+  br label %bb2b
+
+bb2b:
+  %tmp2 = load float* %scevgep1, align 4
+  store float %tmp2, float* %scevgep2, align 4
+  br label %bb4
+
+bb4:
+  %indvar.next = add i64 %indvar, 1
+  br label %bb1
+
+bb5:
+  ret void
+}
+
+define i32 @main() nounwind {
+  call void @simple_vec_stride_one()
+  %1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+  %2 = fptosi float %1 to i32
+  ret i32 %2
+}
+
+; CHECK: [[LOAD1:%[a-zA-Z0-9_]+_full]] = load <4 x float>*
+; CHECK: store <4 x float> [[LOAD1]]
+; CHECK: [[LOAD2:%[a-zA-Z0-9_]+_full]] = load <4 x float>*
+; CHECK: store <4 x float> [[LOAD2]]
+





More information about the llvm-commits mailing list