[llvm-commits] [polly] r170420 - in /polly/trunk: include/polly/CodeGen/ lib/CodeGen/ test/Isl/CodeGen/
Sebastian Pop
spop at codeaurora.org
Mon Dec 17 23:46:14 PST 2012
Author: spop
Date: Tue Dec 18 01:46:13 2012
New Revision: 170420
URL: http://llvm.org/viewvc/llvm-project?rev=170420&view=rev
Log:
isl: vector code generation based on ISL ast
Original patch by Tobias Grosser, slightly modified by Sebastian Pop.
Added:
polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar.ll
polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar_2.ll
polly/trunk/test/Isl/CodeGen/simple_vec_call.ll
polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll
polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll
polly/trunk/test/Isl/CodeGen/simple_vec_const.ll
polly/trunk/test/Isl/CodeGen/simple_vec_large_width.ll
polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll
polly/trunk/test/Isl/CodeGen/simple_vec_stride_x.ll
polly/trunk/test/Isl/CodeGen/simple_vec_two_stmts.ll
Modified:
polly/trunk/include/polly/CodeGen/CodeGeneration.h
polly/trunk/include/polly/CodeGen/IslAst.h
polly/trunk/lib/CodeGen/CodeGeneration.cpp
polly/trunk/lib/CodeGen/IslAst.cpp
polly/trunk/lib/CodeGen/IslCodeGeneration.cpp
Modified: polly/trunk/include/polly/CodeGen/CodeGeneration.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/CodeGeneration.h?rev=170420&r1=170419&r2=170420&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/CodeGeneration.h (original)
+++ polly/trunk/include/polly/CodeGen/CodeGeneration.h Tue Dec 18 01:46:13 2012
@@ -14,6 +14,9 @@
#include "polly/Config/config.h"
+#include "isl/set.h"
+#include "isl/map.h"
+
namespace polly {
enum VectorizerChoice {
VECTORIZER_NONE,
@@ -23,6 +26,46 @@
VECTORIZER_BB
};
extern VectorizerChoice PollyVectorizerChoice;
+
+ static inline int getNumberOfIterations(__isl_take isl_set *Domain) {
+ int Dim = isl_set_dim(Domain, isl_dim_set);
+
+ // Calculate a map similar to the identity map, but with the last input
+ // and output dimension not related.
+ // [i0, i1, i2, i3] -> [i0, i1, i2, o0]
+ isl_space *Space = isl_set_get_space(Domain);
+ Space = isl_space_drop_outputs(Space, Dim - 2, 1);
+ Space = isl_space_map_from_set(Space);
+ isl_map *Identity = isl_map_identity(Space);
+ Identity = isl_map_add_dims(Identity, isl_dim_in, 1);
+ Identity = isl_map_add_dims(Identity, isl_dim_out, 1);
+
+ isl_map *Map = isl_map_from_domain_and_range(isl_set_copy(Domain), Domain);
+ Map = isl_map_intersect(Map, Identity);
+
+ isl_map *LexMax = isl_map_lexmax(isl_map_copy(Map));
+ isl_map *LexMin = isl_map_lexmin(Map);
+ isl_map *Sub = isl_map_sum(LexMax, isl_map_neg(LexMin));
+
+ isl_set *Elements = isl_map_range(Sub);
+
+ if (!isl_set_is_singleton(Elements)) {
+ isl_set_free(Elements);
+ return -1;
+ }
+
+ isl_point *P = isl_set_sample_point(Elements);
+
+ isl_int V;
+ isl_int_init(V);
+ isl_point_get_coordinate(P, isl_dim_set, Dim - 1, &V);
+ int NumberIterations = isl_int_get_si(V);
+ isl_int_clear(V);
+ isl_point_free(P);
+
+ return NumberIterations;
+ }
+
}
#endif // POLLY_CODEGENERATION_H
Modified: polly/trunk/include/polly/CodeGen/IslAst.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/IslAst.h?rev=170420&r1=170419&r2=170420&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/IslAst.h (original)
+++ polly/trunk/include/polly/CodeGen/IslAst.h Tue Dec 18 01:46:13 2012
@@ -25,6 +25,8 @@
#include "polly/Config/config.h"
#include "polly/ScopPass.h"
+#include "isl/ast.h"
+
struct clast_name;
namespace llvm {
class raw_ostream;
@@ -38,9 +40,15 @@
class Scop;
class IslAst;
+ // Information about an ast node.
struct IslAstUser {
struct isl_ast_build *Context;
struct isl_pw_multi_aff *PMA;
+ // The node is the outermost parallel loop.
+ int IsOutermostParallel;
+
+ // The node is the innermost parallel loop.
+ int IsInnermostParallel;
};
class IslAstInfo: public ScopPass {
@@ -61,6 +69,34 @@
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual void releaseMemory();
};
+
+ // Returns true when Node has been tagged as an innermost parallel loop.
+ static inline bool isInnermostParallel(__isl_keep isl_ast_node *Node) {
+ isl_id *Id = isl_ast_node_get_annotation(Node);
+ if (!Id)
+ return false;
+ struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id);
+
+ bool Res = false;
+ if (Info)
+ Res = Info->IsInnermostParallel;
+ isl_id_free(Id);
+ return Res;
+ }
+
+ // Returns true when Node has been tagged as an outermost parallel loop.
+ static inline bool isOutermostParallel(__isl_keep isl_ast_node *Node) {
+ isl_id *Id = isl_ast_node_get_annotation(Node);
+ if (!Id)
+ return false;
+ struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id);
+
+ bool Res = false;
+ if (Info)
+ Res = Info->IsOutermostParallel;
+ isl_id_free(Id);
+ return Res;
+ }
}
namespace llvm {
Modified: polly/trunk/lib/CodeGen/CodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/CodeGeneration.cpp?rev=170420&r1=170419&r2=170420&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/CodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/CodeGeneration.cpp Tue Dec 18 01:46:13 2012
@@ -768,45 +768,9 @@
return true;
}
-int ClastStmtCodeGen::getNumberOfIterations(const clast_for *f) {
- isl_set *loopDomain = isl_set_copy(isl_set_from_cloog_domain(f->domain));
- isl_set *tmp = isl_set_copy(loopDomain);
-
- // Calculate a map similar to the identity map, but with the last input
- // and output dimension not related.
- // [i0, i1, i2, i3] -> [i0, i1, i2, o0]
- isl_space *Space = isl_set_get_space(loopDomain);
- Space = isl_space_drop_outputs(Space,
- isl_set_dim(loopDomain, isl_dim_set) - 2, 1);
- Space = isl_space_map_from_set(Space);
- isl_map *identity = isl_map_identity(Space);
- identity = isl_map_add_dims(identity, isl_dim_in, 1);
- identity = isl_map_add_dims(identity, isl_dim_out, 1);
-
- isl_map *map = isl_map_from_domain_and_range(tmp, loopDomain);
- map = isl_map_intersect(map, identity);
-
- isl_map *lexmax = isl_map_lexmax(isl_map_copy(map));
- isl_map *lexmin = isl_map_lexmin(map);
- isl_map *sub = isl_map_sum(lexmax, isl_map_neg(lexmin));
-
- isl_set *elements = isl_map_range(sub);
-
- if (!isl_set_is_singleton(elements)) {
- isl_set_free(elements);
- return -1;
- }
-
- isl_point *p = isl_set_sample_point(elements);
-
- isl_int v;
- isl_int_init(v);
- isl_point_get_coordinate(p, isl_dim_set, isl_set_n_dim(loopDomain) - 1, &v);
- int numberIterations = isl_int_get_si(v);
- isl_int_clear(v);
- isl_point_free(p);
-
- return (numberIterations) / isl_int_get_si(f->stride) + 1;
+int ClastStmtCodeGen::getNumberOfIterations(const clast_for *For) {
+ isl_set *LoopDomain = isl_set_copy(isl_set_from_cloog_domain(For->domain));
+ return polly::getNumberOfIterations(LoopDomain) / isl_int_get_si(For->stride) + 1;
}
void ClastStmtCodeGen::codegenForVector(const clast_for *F) {
Modified: polly/trunk/lib/CodeGen/IslAst.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslAst.cpp?rev=170420&r1=170419&r2=170420&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslAst.cpp (original)
+++ polly/trunk/lib/CodeGen/IslAst.cpp Tue Dec 18 01:46:13 2012
@@ -19,6 +19,7 @@
//
//===----------------------------------------------------------------------===//
+#include "polly/CodeGen/CodeGeneration.h"
#include "polly/CodeGen/IslAst.h"
#include "polly/LinkAllPasses.h"
@@ -31,7 +32,6 @@
#include "isl/union_map.h"
#include "isl/list.h"
-#include "isl/ast.h"
#include "isl/ast_build.h"
#include "isl/set.h"
#include "isl/map.h"
@@ -68,24 +68,6 @@
};
} // End namespace polly.
-
-static void IslAstUserFree(void *User)
-{
- struct IslAstUser *UserStruct = (struct IslAstUser *) User;
- isl_ast_build_free(UserStruct->Context);
- isl_pw_multi_aff_free(UserStruct->PMA);
- free(UserStruct);
-}
-
-// Information about an ast node.
-struct AstNodeUserInfo {
- // The node is the outermost parallel loop.
- int IsOutermostParallel;
-
- // The node is the innermost parallel loop.
- int IsInnermostParallel;
-};
-
// Temporary information used when building the ast.
struct AstBuildUserInfo {
// The dependence information.
@@ -99,7 +81,7 @@
static __isl_give isl_printer *
printParallelFor(__isl_keep isl_ast_node *Node, __isl_take isl_printer *Printer,
__isl_take isl_ast_print_options *PrintOptions,
- AstNodeUserInfo *Info) {
+ IslAstUser *Info) {
if (Info) {
if (Info->IsInnermostParallel) {
Printer = isl_printer_start_line(Printer);
@@ -124,26 +106,29 @@
if (!Id)
return isl_ast_node_for_print(Node, Printer, PrintOptions);
- struct AstNodeUserInfo *Info = (struct AstNodeUserInfo *) isl_id_get_user(Id);
+ struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id);
Printer = printParallelFor(Node, Printer, PrintOptions, Info);
isl_id_free(Id);
return Printer;
}
// Allocate an AstNodeInfo structure and initialize it with default values.
-static struct AstNodeUserInfo *allocateAstNodeUserInfo() {
- struct AstNodeUserInfo *NodeInfo;
- NodeInfo = (struct AstNodeUserInfo *) malloc(sizeof(struct AstNodeUserInfo));
+static struct IslAstUser *allocateIslAstUser() {
+ struct IslAstUser *NodeInfo;
+ NodeInfo = (struct IslAstUser *) malloc(sizeof(struct IslAstUser));
+ NodeInfo->PMA = 0;
+ NodeInfo->Context = 0;
NodeInfo->IsOutermostParallel = 0;
NodeInfo->IsInnermostParallel = 0;
return NodeInfo;
}
// Free the AstNodeInfo structure.
-static void freeAstNodeUserInfo(void *Ptr) {
- struct AstNodeUserInfo *Info;
- Info = (struct AstNodeUserInfo *) Ptr;
- free(Info);
+static void freeIslAstUser(void *Ptr) {
+ struct IslAstUser *UserStruct = (struct IslAstUser *) Ptr;
+ isl_ast_build_free(UserStruct->Context);
+ isl_pw_multi_aff_free(UserStruct->PMA);
+ free(UserStruct);
}
// Check if the current scheduling dimension is parallel.
@@ -200,7 +185,7 @@
// Mark a for node openmp parallel, if it is the outermost parallel for node.
static void markOpenmpParallel(__isl_keep isl_ast_build *Build,
struct AstBuildUserInfo *BuildInfo,
- struct AstNodeUserInfo *NodeInfo) {
+ struct IslAstUser *NodeInfo) {
if (BuildInfo->InParallelFor)
return;
@@ -219,14 +204,10 @@
//
static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build,
void *User) {
- isl_id *Id;
- struct AstBuildUserInfo *BuildInfo;
- struct AstNodeUserInfo *NodeInfo;
-
- BuildInfo = (struct AstBuildUserInfo *) User;
- NodeInfo = allocateAstNodeUserInfo();
- Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo);
- Id = isl_id_set_free_user(Id, freeAstNodeUserInfo);
+ struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *) User;
+ struct IslAstUser *NodeInfo = allocateIslAstUser();
+ isl_id *Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", NodeInfo);
+ Id = isl_id_set_free_user(Id, freeIslAstUser);
markOpenmpParallel(Build, BuildInfo, NodeInfo);
@@ -286,7 +267,7 @@
isl_id *Id = isl_ast_node_get_annotation(Node);
if (!Id)
return Node;
- struct AstNodeUserInfo *Info = (struct AstNodeUserInfo *) isl_id_get_user(Id);
+ struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Id);
struct AstBuildUserInfo *BuildInfo = (struct AstBuildUserInfo *) User;
if (Info) {
if (Info->IsOutermostParallel)
@@ -296,28 +277,36 @@
Info->IsInnermostParallel = 1;
}
- isl_id_free(Id);
+ if (!Info->Context)
+ Info->Context = isl_ast_build_copy(Build);
+ isl_id_free(Id);
return Node;
}
static __isl_give isl_ast_node *
-AtEachDomain(__isl_keep isl_ast_node *Node,
+AtEachDomain(__isl_take isl_ast_node *Node,
__isl_keep isl_ast_build *Context, void *User)
{
- isl_map *Map;
- struct IslAstUser *UserStruct;
+ struct IslAstUser *Info = NULL;
+ isl_id *Id = isl_ast_node_get_annotation(Node);
+
+ if (Id)
+ Info = (struct IslAstUser *) isl_id_get_user(Id);
+
+ if (!Info) {
+ // Allocate annotations once: parallel for detection might have already
+ // allocated the annotations for this node.
+ Info = allocateIslAstUser();
+ Id = isl_id_alloc(isl_ast_node_get_ctx(Node), NULL, Info);
+ Id = isl_id_set_free_user(Id, &freeIslAstUser);
+ }
- UserStruct = (struct IslAstUser *) malloc(sizeof(struct IslAstUser));
+ isl_map *Map = isl_map_from_union_map(isl_ast_build_get_schedule(Context));
+ Info->PMA = isl_pw_multi_aff_from_map(isl_map_reverse(Map));
+ Info->Context = isl_ast_build_copy(Context);
- Map = isl_map_from_union_map(isl_ast_build_get_schedule(Context));
- UserStruct->PMA = isl_pw_multi_aff_from_map(isl_map_reverse(Map));
- UserStruct->Context = isl_ast_build_copy(Context);
-
- isl_id *Annotation = isl_id_alloc(isl_ast_node_get_ctx(Node), NULL,
- UserStruct);
- Annotation = isl_id_set_free_user(Annotation, &IslAstUserFree);
- return isl_ast_node_set_annotation(Node, Annotation);
+ return isl_ast_node_set_annotation(Node, Id);
}
IslAst::IslAst(Scop *Scop, Dependences &D) : S(Scop) {
@@ -343,7 +332,7 @@
isl_union_map_dump(Schedule);
);
- if (DetectParallel) {
+ if (DetectParallel || PollyVectorizerChoice != VECTORIZER_NONE) {
BuildInfo.Deps = &D;
BuildInfo.InParallelFor = 0;
Modified: polly/trunk/lib/CodeGen/IslCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslCodeGeneration.cpp?rev=170420&r1=170419&r2=170420&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/IslCodeGeneration.cpp Tue Dec 18 01:46:13 2012
@@ -26,6 +26,7 @@
#include "polly/TempScopInfo.h"
#include "polly/CodeGen/IslAst.h"
#include "polly/CodeGen/BlockGenerators.h"
+#include "polly/CodeGen/CodeGeneration.h"
#include "polly/CodeGen/LoopGenerators.h"
#include "polly/CodeGen/Utils.h"
#include "polly/Support/GICHelper.h"
@@ -579,8 +580,23 @@
__isl_give isl_ast_expr *getUpperBound(__isl_keep isl_ast_node *For,
CmpInst::Predicate &Predicate);
+ unsigned getNumberOfIterations(__isl_keep isl_ast_node *For);
+
void createFor(__isl_take isl_ast_node *For);
+ void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
+ void createForSequential(__isl_take isl_ast_node *For);
+ void createSubstitutions(__isl_take isl_pw_multi_aff *PMA,
+ __isl_take isl_ast_build *Context,
+ ScopStmt *Stmt, ValueMapT &VMap);
+ void createSubstitutionsVector(__isl_take isl_pw_multi_aff *PMA,
+ __isl_take isl_ast_build *Context,
+ ScopStmt *Stmt, VectorValueMapT &VMap,
+ std::vector<Value*> &IVS,
+ __isl_take isl_id *IteratorID);
void createIf(__isl_take isl_ast_node *If);
+ void createUserVector(__isl_take isl_ast_node *User,
+ std::vector<Value*> &IVS, __isl_take isl_id *IteratorID,
+ __isl_take isl_union_map *Schedule);
void createUser(__isl_take isl_ast_node *User);
void createBlock(__isl_take isl_ast_node *Block);
};
@@ -635,7 +651,128 @@
return UB;
}
-void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
+unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) {
+ isl_id *Annotation = isl_ast_node_get_annotation(For);
+ if (!Annotation)
+ return -1;
+
+ struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation);
+ if (!Info) {
+ isl_id_free(Annotation);
+ return -1;
+ }
+
+ isl_union_map *Schedule = isl_ast_build_get_schedule(Info->Context);
+ isl_set *LoopDomain = isl_set_from_union_set(isl_union_map_range(Schedule));
+ isl_id_free(Annotation);
+ return polly::getNumberOfIterations(LoopDomain) + 1;
+}
+
+void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
+ std::vector<Value*> &IVS,
+ __isl_take isl_id *IteratorID,
+ __isl_take isl_union_map *Schedule) {
+ isl_id *Annotation = isl_ast_node_get_annotation(User);
+ assert(Annotation && "Vector user statement is not annotated");
+
+ struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation);
+ assert(Info && "Vector user statement annotation does not contain info");
+
+ isl_id *Id = isl_pw_multi_aff_get_tuple_id(Info->PMA, isl_dim_out);
+ ScopStmt *Stmt = (ScopStmt *) isl_id_get_user(Id);
+ VectorValueMapT VectorMap(IVS.size());
+
+ isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain());
+ Schedule = isl_union_map_intersect_domain(Schedule, Domain);
+ isl_map *S = isl_map_from_union_map(Schedule);
+
+ createSubstitutionsVector(isl_pw_multi_aff_copy(Info->PMA),
+ isl_ast_build_copy(Info->Context),
+ Stmt, VectorMap, IVS, IteratorID);
+ VectorBlockGenerator::generate(Builder, *Stmt, VectorMap, S, P);
+
+
+ isl_map_free(S);
+ isl_id_free(Annotation);
+ isl_id_free(Id);
+ isl_ast_node_free(User);
+}
+
+void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
+ int VectorWidth) {
+ isl_ast_node *Body = isl_ast_node_for_get_body(For);
+ isl_ast_expr *Init = isl_ast_node_for_get_init(For);
+ isl_ast_expr *Inc = isl_ast_node_for_get_inc(For);
+ isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For);
+ isl_id *IteratorID = isl_ast_expr_get_id(Iterator);
+ CmpInst::Predicate Predicate;
+ isl_ast_expr *UB = getUpperBound(For, Predicate);
+
+ Value *ValueLB = ExprBuilder.create(Init);
+ Value *ValueUB = ExprBuilder.create(UB);
+ Value *ValueInc = ExprBuilder.create(Inc);
+
+ Type *MaxType = ExprBuilder.getType(Iterator);
+ MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
+ MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType());
+ MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
+
+ if (MaxType != ValueLB->getType())
+ ValueLB = Builder.CreateSExt(ValueLB, MaxType);
+ if (MaxType != ValueUB->getType())
+ ValueUB = Builder.CreateSExt(ValueUB, MaxType);
+ if (MaxType != ValueInc->getType())
+ ValueInc = Builder.CreateSExt(ValueInc, MaxType);
+
+ std::vector<Value*> IVS(VectorWidth);
+ IVS[0] = ValueLB;
+
+ for (int i = 1; i < VectorWidth; i++)
+ IVS[i] = Builder.CreateAdd(IVS[i-1], ValueInc, "p_vector_iv");
+
+ isl_id *Annotation = isl_ast_node_get_annotation(For);
+ assert(Annotation && "For statement is not annotated");
+
+ struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation);
+ assert(Info && "For statement annotation does not contain info");
+
+ isl_union_map *Schedule = isl_ast_build_get_schedule(Info->Context);
+ assert(Schedule && "For statement annotation does not contain its schedule");
+
+ IDToValue[IteratorID] = ValueLB;
+
+ switch (isl_ast_node_get_type(Body)) {
+ case isl_ast_node_user:
+ createUserVector(Body, IVS, isl_id_copy(IteratorID),
+ isl_union_map_copy(Schedule));
+ break;
+ case isl_ast_node_block: {
+ isl_ast_node_list *List = isl_ast_node_block_get_children(Body);
+
+ for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i)
+ createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS,
+ isl_id_copy(IteratorID),
+ isl_union_map_copy(Schedule));
+
+ isl_ast_node_free(Body);
+ isl_ast_node_list_free(List);
+ break;
+ }
+ default:
+ isl_ast_node_dump(Body);
+ llvm_unreachable("Unhandled isl_ast_node in vectorizer");
+ }
+
+ IDToValue.erase(IteratorID);
+ isl_id_free(IteratorID);
+ isl_id_free(Annotation);
+ isl_union_map_free(Schedule);
+
+ isl_ast_node_free(For);
+ isl_ast_expr_free(Iterator);
+}
+
+void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) {
isl_ast_node *Body;
isl_ast_expr *Init, *Inc, *Iterator, *UB;
isl_id *IteratorID;
@@ -696,6 +833,19 @@
isl_id_free(IteratorID);
}
+void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
+ bool Vector = PollyVectorizerChoice != VECTORIZER_NONE;
+
+ if (Vector && isInnermostParallel(For)) {
+ int VectorWidth = getNumberOfIterations(For);
+ if (1 < VectorWidth && VectorWidth <= 16) {
+ createForVector(For, VectorWidth);
+ return;
+ }
+ }
+ createForSequential(For);
+}
+
void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
isl_ast_expr *Cond = isl_ast_node_if_get_cond(If);
@@ -738,26 +888,18 @@
isl_ast_node_free(If);
}
-void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
- ValueMapT VMap;
- struct IslAstUser *UserInfo;
- isl_id *Annotation, *Id;
- ScopStmt *Stmt;
-
- Annotation = isl_ast_node_get_annotation(User);
- UserInfo = (struct IslAstUser *) isl_id_get_user(Annotation);
- Id = isl_pw_multi_aff_get_tuple_id(UserInfo->PMA, isl_dim_out);
- Stmt = (ScopStmt *) isl_id_get_user(Id);
-
- for (unsigned i = 0; i < isl_pw_multi_aff_dim(UserInfo->PMA, isl_dim_out);
+void IslNodeBuilder::createSubstitutions(__isl_take isl_pw_multi_aff *PMA,
+ __isl_take isl_ast_build *Context,
+ ScopStmt *Stmt, ValueMapT &VMap) {
+ for (unsigned i = 0; i < isl_pw_multi_aff_dim(PMA, isl_dim_out);
++i) {
isl_pw_aff *Aff;
isl_ast_expr *Expr;
const Value *OldIV;
Value *V;
- Aff = isl_pw_multi_aff_get_pw_aff(UserInfo->PMA, i);
- Expr = isl_ast_build_expr_from_pw_aff(UserInfo->Context, Aff);
+ Aff = isl_pw_multi_aff_get_pw_aff(PMA, i);
+ Expr = isl_ast_build_expr_from_pw_aff(Context, Aff);
OldIV = Stmt->getInductionVariableForDimension(i);
V = ExprBuilder.create(Expr);
@@ -768,6 +910,48 @@
VMap[OldIV] = V;
}
+ isl_pw_multi_aff_free(PMA);
+ isl_ast_build_free(Context);
+}
+
+void IslNodeBuilder::createSubstitutionsVector(__isl_take isl_pw_multi_aff *PMA,
+ __isl_take isl_ast_build *Context, ScopStmt *Stmt, VectorValueMapT &VMap,
+ std::vector<Value*> &IVS, __isl_take isl_id *IteratorID) {
+ int i = 0;
+
+ Value *OldValue = IDToValue[IteratorID];
+ for (std::vector<Value*>::iterator II = IVS.begin(), IE = IVS.end();
+ II != IE; ++II) {
+ IDToValue[IteratorID] = *II;
+ createSubstitutions(isl_pw_multi_aff_copy(PMA),
+ isl_ast_build_copy(Context), Stmt, VMap[i]);
+ i++;
+ }
+
+ IDToValue[IteratorID] = OldValue;
+ isl_id_free(IteratorID);
+ isl_pw_multi_aff_free(PMA);
+ isl_ast_build_free(Context);
+}
+
+void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) {
+ ValueMapT VMap;
+ struct IslAstUser *Info;
+ isl_id *Annotation, *Id;
+ ScopStmt *Stmt;
+
+ Annotation = isl_ast_node_get_annotation(User);
+ assert(Annotation && "Scalar user statement is not annotated");
+
+ Info = (struct IslAstUser *) isl_id_get_user(Annotation);
+ assert(Info && "Scalar user statement annotation does not contain info");
+
+ Id = isl_pw_multi_aff_get_tuple_id(Info->PMA, isl_dim_out);
+ Stmt = (ScopStmt *) isl_id_get_user(Id);
+
+ createSubstitutions(isl_pw_multi_aff_copy(Info->PMA),
+ isl_ast_build_copy(Info->Context), Stmt, VMap);
+
BlockGenerator::generate(Builder, *Stmt, VMap, P);
isl_ast_node_free(User);
Added: polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,65 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt -dce -S < %s | FileCheck %s
+
+;#define N 1024
+;float A[N];
+;float B[N];
+;
+;void simple_vec_const(void) {
+; int i;
+;
+; for (i = 0; i < 4; i++)
+; B[i] = A[i] + 1;
+;}
+;int main()
+;{
+; simple_vec_const();
+; return A[42];
+;}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_const() nounwind {
+bb:
+ br label %bb2
+
+bb2: ; preds = %bb5, %bb
+ %indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ]
+ %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+ %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+ %exitcond = icmp ne i64 %indvar, 4
+ br i1 %exitcond, label %bb3, label %bb6
+
+bb3: ; preds = %bb2
+ %tmp = load float* %scevgep1, align 4
+ %tmp4 = fadd float %tmp, 1.000000e+00
+ store float %tmp4, float* %scevgep, align 4
+ br label %bb5
+
+bb5: ; preds = %bb3
+ %indvar.next = add i64 %indvar, 1
+ br label %bb2
+
+bb6: ; preds = %bb2
+ ret void
+}
+
+define i32 @main() nounwind {
+bb:
+ call void @simple_vec_const()
+ %tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+ %tmp1 = fptosi float %tmp to i32
+ ret i32 %tmp1
+}
+
+; CHECK: %p_scevgep1.moved.to.bb3 = getelementptr [1024 x float]* @A, i64 0, i64 0
+; CHECK: %p_scevgep.moved.to.bb3 = getelementptr [1024 x float]* @B, i64 0, i64 0
+; CHECK: %vector_ptr = bitcast float* %p_scevgep1.moved.to.bb3 to <4 x float>*
+; CHECK: %tmp_p_vec_full = load <4 x float>* %vector_ptr, align 8
+; CHECK: %tmp4p_vec = fadd <4 x float> %tmp_p_vec_full, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK: %vector_ptr7 = bitcast float* %p_scevgep.moved.to.bb3 to <4 x float>*
+; CHECK: store <4 x float> %tmp4p_vec, <4 x float>* %vector_ptr7, align 8
+
Added: polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar_2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar_2.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar_2.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_assign_scalar_2.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,66 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt -dce -S < %s | FileCheck %s
+
+;#define N 1024
+;float A[N];
+;float B[N];
+;
+;void simple_vec_const(void) {
+; int i;
+;
+; for (i = 0; i < 4; i++)
+; B[i] = A[i] + i;
+;}
+;int main()
+;{
+; simple_vec_const();
+; return A[42];
+;}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_const() nounwind {
+bb:
+ br label %bb2
+
+bb2: ; preds = %bb6, %bb
+ %indvar = phi i64 [ %indvar.next, %bb6 ], [ 0, %bb ]
+ %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+ %i.0 = trunc i64 %indvar to i32
+ %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+ %exitcond = icmp ne i64 %indvar, 4
+ br i1 %exitcond, label %bb3, label %bb7
+
+bb3: ; preds = %bb2
+ %tmp = load float* %scevgep1, align 4
+ %tmp4 = sitofp i32 %i.0 to float
+ %tmp5 = fadd float %tmp, %tmp4
+ store float %tmp5, float* %scevgep, align 4
+ br label %bb6
+
+bb6: ; preds = %bb3
+ %indvar.next = add i64 %indvar, 1
+ br label %bb2
+
+bb7: ; preds = %bb2
+ ret void
+}
+
+define i32 @main() nounwind {
+bb:
+ call void @simple_vec_const()
+ %tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+ %tmp1 = fptosi float %tmp to i32
+ ret i32 %tmp1
+}
+
+
+; CHECK: insertelement <4 x float> undef, float %{{[^,]+}}, i32 0
+; CHECK: insertelement <4 x float> %0, float %{{[^,]+}}, i32 1
+; CHECK: insertelement <4 x float> %1, float %{{[^,]+}}, i32 2
+; CHECK: insertelement <4 x float> %2, float %{{[^,]+}}, i32 3
+; CHECK: fadd <4 x float> %tmp_p_vec_full, %3
+
Added: polly/trunk/test/Isl/CodeGen/simple_vec_call.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_call.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_call.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_call.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,42 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+declare float @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+ br label %body
+
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+ %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+ %value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+ %result = tail call float @foo(float %value) nounwind
+ store float %result, float* %scevgep, align 4
+ %indvar_next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar_next, 4
+ br i1 %exitcond, label %return, label %body
+
+return:
+ ret void
+}
+
+; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
+; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
+; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
+; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
+; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %0) nounwind
+; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %1) nounwind
+; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %2) nounwind
+; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %3) nounwind
+; CHECK: %4 = insertelement <4 x float> undef, float [[RES1]], i32 0
+; CHECK: %5 = insertelement <4 x float> %4, float [[RES2]], i32 1
+; CHECK: %6 = insertelement <4 x float> %5, float [[RES3]], i32 2
+; CHECK: %7 = insertelement <4 x float> %6, float [[RES4]], i32 3
+; CHECK: store <4 x float> %7
Added: polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,61 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -polly-codegen-scev=false -dce -S < %s | FileCheck %s
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -polly-codegen-scev=true -dce -S < %s | FileCheck %s -check-prefix=CHECK-SCEV
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float**] zeroinitializer, align 16
+
+declare float** @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+ br label %body
+
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+ %scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
+ %value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+ %result = tail call float** @foo(float %value) nounwind
+ store float** %result, float*** %scevgep, align 4
+ %indvar_next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar_next, 4
+ br i1 %exitcond, label %return, label %body
+
+return:
+ ret void
+}
+
+; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
+; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
+; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
+; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
+; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
+; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %0) nounwind
+; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %1) nounwind
+; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %2) nounwind
+; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %3) nounwind
+; CHECK: %4 = insertelement <4 x float**> undef, float** %p_result, i32 0
+; CHECK: %5 = insertelement <4 x float**> %4, float** %p_result4, i32 1
+; CHECK: %6 = insertelement <4 x float**> %5, float** %p_result5, i32 2
+; CHECK: %7 = insertelement <4 x float**> %6, float** %p_result6, i32 3
+; CHECK: %vector_ptr = bitcast float*** %p_scevgep to <4 x float**>*
+; CHECK: store <4 x float**> %7, <4 x float**>* %vector_ptr, align 8
+
+; CHECK-SCEV: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK-SCEV: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK-SCEV: %0 = extractelement <4 x float> %value_p_splat, i32 0
+; CHECK-SCEV: %1 = extractelement <4 x float> %value_p_splat, i32 1
+; CHECK-SCEV: %2 = extractelement <4 x float> %value_p_splat, i32 2
+; CHECK-SCEV: %3 = extractelement <4 x float> %value_p_splat, i32 3
+; CHECK-SCEV: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %0) nounwind
+; CHECK-SCEV: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %1) nounwind
+; CHECK-SCEV: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %2) nounwind
+; CHECK-SCEV: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %3) nounwind
+; CHECK-SCEV: %4 = insertelement <4 x float**> undef, float** %p_result, i32 0
+; CHECK-SCEV: %5 = insertelement <4 x float**> %4, float** %p_result1, i32 1
+; CHECK-SCEV: %6 = insertelement <4 x float**> %5, float** %p_result2, i32 2
+; CHECK-SCEV: %7 = insertelement <4 x float**> %6, float** %p_result3, i32 3
+; CHECK-SCEV: store <4 x float**> %7, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align
Added: polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,38 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -dce -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x double] zeroinitializer, align 16
+
+define void @simple_vec_const() nounwind {
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb3, %bb
+ %indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb ]
+ %scevgep = getelementptr [1024 x double]* @B, i64 0, i64 %indvar
+ %exitcond = icmp ne i64 %indvar, 4
+ br i1 %exitcond, label %bb2, label %bb4
+
+bb2: ; preds = %bb1
+ %tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+ %tmp2 = fpext float %tmp to double
+ store double %tmp2, double* %scevgep, align 4
+ br label %bb3
+
+bb3: ; preds = %bb2
+ %indvar.next = add i64 %indvar, 1
+ br label %bb1
+
+bb4: ; preds = %bb1
+ ret void
+}
+
+; CHECK: %p_scevgep.moved.to.bb2 = getelementptr [1024 x double]* @B, i64 0, i64 0
+; CHECK: %tmp_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK: %tmp_p_splat = shufflevector <1 x float> %tmp_p_splat_one, <1 x float> %tmp_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %0 = fpext <4 x float> %tmp_p_splat to <4 x double>
+; CHECK: %vector_ptr = bitcast double* %p_scevgep.moved.to.bb2 to <4 x double>*
+; CHECK: store <4 x double> %0, <4 x double>* %vector_ptr, align 8
+
Added: polly/trunk/test/Isl/CodeGen/simple_vec_const.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_const.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_const.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_const.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,57 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt -S < %s | FileCheck %s
+
+;#define N 1024
+;float A[N];
+;float B[N];
+;
+;void simple_vec_const(void) {
+; int i;
+;
+; for (i = 0; i < 4; i++)
+; B[i] = A[0];
+;}
+;int main()
+;{
+; simple_vec_const();
+; return A[42];
+;}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_const() nounwind {
+; <label>:0
+ br label %1
+
+; <label>:1 ; preds = %4, %0
+ %indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ]
+ %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+ %exitcond = icmp ne i64 %indvar, 4
+ br i1 %exitcond, label %2, label %5
+
+; <label>:2 ; preds = %1
+ %3 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+ store float %3, float* %scevgep, align 4
+ br label %4
+
+; <label>:4 ; preds = %2
+ %indvar.next = add i64 %indvar, 1
+ br label %1
+
+; <label>:5 ; preds = %1
+ ret void
+}
+
+define i32 @main() nounwind {
+ call void @simple_vec_const()
+ %1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+ %2 = fptosi float %1 to i32
+ ret i32 %2
+}
+
+
+; CHECK: load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*)
+; CHECK: shufflevector <1 x float> {{.*}}, <1 x float> {{.*}} <4 x i32> zeroinitializer
Added: polly/trunk/test/Isl/CodeGen/simple_vec_large_width.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_large_width.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_large_width.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_large_width.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,40 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt -dce -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_large_width() nounwind {
+; <label>:0
+ br label %1
+
+; <label>:1 ; preds = %4, %0
+ %indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ]
+ %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+ %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+ %exitcond = icmp ne i64 %indvar, 15
+ br i1 %exitcond, label %2, label %5
+
+; <label>:2 ; preds = %1
+ %3 = load float* %scevgep1, align 4
+ store float %3, float* %scevgep, align 4
+ br label %4
+
+; <label>:4 ; preds = %2
+ %indvar.next = add i64 %indvar, 1
+ br label %1
+
+; <label>:5 ; preds = %1
+ ret void
+}
+
+define i32 @main() nounwind {
+ call void @simple_vec_large_width()
+ %1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+ %2 = fptosi float %1 to i32
+ ret i32 %2
+}
+
+; CHECK: [[VEC1:%[a-zA-Z0-9_]+_full]] = load <15 x float>*
+; CHECK: store <15 x float> [[VEC1]]
Added: polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,40 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -S -polly-codegen-scev=false < %s | FileCheck %s
+; RUN: opt %loadPolly -basicaa -polly-codegen-isl %vector-opt -S -polly-codegen-scev=true < %s | FileCheck %s -check-prefix=CHECK-SCEV
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float**] zeroinitializer, align 16
+ at B = common global [1024 x float**] zeroinitializer, align 16
+
+declare float @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+ br label %body
+
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+ %scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
+ %value = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0), align 16
+ store float** %value, float*** %scevgep, align 4
+ %indvar_next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar_next, 4
+ br i1 %exitcond, label %return, label %body
+
+return:
+ ret void
+}
+
+; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
+; CHECK: %p_scevgep1 = getelementptr [1024 x float**]* @B, i64 0, i64 1
+; CHECK: %p_scevgep2 = getelementptr [1024 x float**]* @B, i64 0, i64 2
+; CHECK: %p_scevgep3 = getelementptr [1024 x float**]* @B, i64 0, i64 3
+; CHECK: %value_p_splat_one = load <1 x float**>* bitcast ([1024 x float**]* @A to <1 x float**>*), align 8
+; CHECK: %value_p_splat = shufflevector <1 x float**> %value_p_splat_one, <1 x float**> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %vector_ptr = bitcast float*** %p_scevgep to <4 x float**>*
+; CHECK: store <4 x float**> %value_p_splat, <4 x float**>* %vector_ptr
+
+
+; CHECK-SCEV: %value_p_splat_one = load <1 x float**>* bitcast ([1024 x float**]* @A to <1 x float**>*), align 8
+; CHECK-SCEV: %value_p_splat = shufflevector <1 x float**> %value_p_splat_one, <1 x float**> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK-SCEV: store <4 x float**> %value_p_splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8
Added: polly/trunk/test/Isl/CodeGen/simple_vec_stride_x.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_stride_x.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_stride_x.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_stride_x.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,73 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt -dce -S < %s | FileCheck %s
+
+;#define N 1024
+;float A[N];
+;float B[N];
+;
+;void simple_vec_stride_x(void) {
+; int i;
+;
+; for (i = 0; i < 4; i++)
+; B[2 * i] = A[2 * i];
+;}
+;int main()
+;{
+; simple_vec_stride_x();
+; return A[42];
+;}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_stride_x() nounwind {
+bb:
+ br label %bb2
+
+bb2: ; preds = %bb5, %bb
+ %indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ]
+ %tmp = mul i64 %indvar, 2
+ %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %tmp
+ %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %tmp
+ %exitcond = icmp ne i64 %indvar, 4
+ br i1 %exitcond, label %bb3, label %bb6
+
+bb3: ; preds = %bb2
+ %tmp4 = load float* %scevgep1, align 8
+ store float %tmp4, float* %scevgep, align 8
+ br label %bb5
+
+bb5: ; preds = %bb3
+ %indvar.next = add i64 %indvar, 1
+ br label %bb2
+
+bb6: ; preds = %bb2
+ ret void
+}
+
+define i32 @main() nounwind {
+bb:
+ call void @simple_vec_stride_x()
+ %tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+ %tmp1 = fptosi float %tmp to i32
+ ret i32 %tmp1
+}
+
+; CHECK: [[LOAD1:%[a-zA-Z0-9_]+_scalar_]] = load float*
+; CHECK: [[VEC1:%[a-zA-Z0-9_]+]] = insertelement <4 x float> undef, float [[LOAD1]], i32 0
+; CHECK: [[LOAD2:%[a-zA-Z0-9_]+]] = load float*
+; CHECK: [[VEC2:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC1]], float [[LOAD2]], i32 1
+; CHECK: [[LOAD3:%[a-zA-Z0-9_]+]] = load float*
+; CHECK: [[VEC3:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC2]], float [[LOAD3]], i32 2
+; CHECK: [[LOAD4:%[a-zA-Z0-9_]+]] = load float*
+; CHECK: [[VEC4:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC3]], float [[LOAD4]], i32 3
+; CHECK: [[EL1:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 0
+; CHECK: store float [[EL1]]
+; CHECK: [[EL2:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 1
+; CHECK: store float [[EL2]]
+; CHECK: [[EL3:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 2
+; CHECK: store float [[EL3]]
+; CHECK: [[EL4:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 3
+; CHECK: store float [[EL4]]
Added: polly/trunk/test/Isl/CodeGen/simple_vec_two_stmts.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_two_stmts.ll?rev=170420&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_two_stmts.ll (added)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_two_stmts.ll Tue Dec 18 01:46:13 2012
@@ -0,0 +1,50 @@
+; RUN: opt %loadPolly %defaultOpts -polly-codegen-isl %vector-opt -dce -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+ at C = common global [1024 x float] zeroinitializer, align 16
+
+define void @simple_vec_stride_one() nounwind {
+bb0:
+ br label %bb1
+
+bb1:
+ %indvar = phi i64 [ %indvar.next, %bb4 ], [ 0, %bb0 ]
+ %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+ %scevgep2 = getelementptr [1024 x float]* @C, i64 0, i64 %indvar
+ %scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
+ %exitcond = icmp ne i64 %indvar, 4
+ br i1 %exitcond, label %bb2a, label %bb5
+
+bb2a:
+ %tmp1 = load float* %scevgep1, align 4
+ store float %tmp1, float* %scevgep, align 4
+ br label %bb2b
+
+bb2b:
+ %tmp2 = load float* %scevgep1, align 4
+ store float %tmp2, float* %scevgep2, align 4
+ br label %bb4
+
+bb4:
+ %indvar.next = add i64 %indvar, 1
+ br label %bb1
+
+bb5:
+ ret void
+}
+
+define i32 @main() nounwind {
+ call void @simple_vec_stride_one()
+ %1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
+ %2 = fptosi float %1 to i32
+ ret i32 %2
+}
+
+; CHECK: [[LOAD1:%[a-zA-Z0-9_]+_full]] = load <4 x float>*
+; CHECK: store <4 x float> [[LOAD1]]
+; CHECK: [[LOAD2:%[a-zA-Z0-9_]+_full]] = load <4 x float>*
+; CHECK: store <4 x float> [[LOAD2]]
+
More information about the llvm-commits
mailing list