[polly] 42cd38c - [Polly] Remove -polly-vectorizer=polly.

Wed Mar 8 10:54:46 PST 2023

Author: Michael Kruse
Date: 2023-03-08T12:51:42-06:00
New Revision: 42cd38c01e5b76357e77b8c0f4f32d77a97d153f

URL: https://github.com/llvm/llvm-project/commit/42cd38c01e5b76357e77b8c0f4f32d77a97d153f
DIFF: https://github.com/llvm/llvm-project/commit/42cd38c01e5b76357e77b8c0f4f32d77a97d153f.diff

LOG: [Polly] Remove -polly-vectorizer=polly.

Polly's internal vectorizer is not well maintained and is known to not work in some cases such as region ScopStmts. Unlike LLVM's LoopVectorize pass it also does not have a target-dependent cost heuristics, and we recommend using LoopVectorize instead of -polly-vectorizer=polly.

In the future we hope that Polly can collaborate better with LoopVectorize, like Polly marking a loop is safe to vectorize with a specific simd width, instead of replicating its functionality.

Reviewed By: grosser

Differential Revision: https://reviews.llvm.org/D142640

Added: 
    

Modified: 
    polly/docs/ReleaseNotes.rst
    polly/include/polly/CodeGen/BlockGenerators.h
    polly/include/polly/CodeGen/CodeGeneration.h
    polly/include/polly/CodeGen/IslNodeBuilder.h
    polly/lib/CodeGen/BlockGenerators.cpp
    polly/lib/CodeGen/IslNodeBuilder.cpp
    polly/lib/Support/RegisterPasses.cpp
    polly/test/CodeGen/MemAccess/simple_analyze.ll
    polly/test/CodeGen/OpenMP/scev-rewriting.ll
    polly/test/ScheduleOptimizer/prevectorization-without-tiling.ll
    polly/test/ScheduleOptimizer/prevectorization.ll
    polly/test/ScheduleOptimizer/rectangular-tiling.ll

Removed: 
    polly/test/CodeGen/20130211-getNumberOfIterations.ll
    polly/test/CodeGen/MemAccess/simple_stride_test.ll
    polly/test/CodeGen/getNumberOfIterations.ll
    polly/test/CodeGen/if-conditions-in-vector-code.ll
    polly/test/CodeGen/invariant_load_hoist_alignment.ll
    polly/test/CodeGen/partial_write_mapped_vector.ll
    polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop
    polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop.transformed
    polly/test/CodeGen/simple_vec_assign_scalar.ll
    polly/test/CodeGen/simple_vec_assign_scalar_2.ll
    polly/test/CodeGen/simple_vec_call.ll
    polly/test/CodeGen/simple_vec_call_2.ll
    polly/test/CodeGen/simple_vec_cast.ll
    polly/test/CodeGen/simple_vec_const.ll
    polly/test/CodeGen/simple_vec_large_width.ll
    polly/test/CodeGen/simple_vec_ptr_ptr_ty.ll
    polly/test/CodeGen/simple_vec_stride_negative_one.ll
    polly/test/CodeGen/simple_vec_stride_one.ll
    polly/test/CodeGen/simple_vec_stride_x.ll
    polly/test/CodeGen/simple_vec_strides_multidim.ll
    polly/test/CodeGen/simple_vec_two_stmts.ll
    polly/test/CodeGen/stride_detection.ll
    polly/test/ScheduleOptimizer/2012-04-16-Trivially-vectorizable-loops.ll
    polly/test/ScheduleOptimizer/vec-addr-space.ll


################################################################################
diff  --git a/polly/docs/ReleaseNotes.rst b/polly/docs/ReleaseNotes.rst
index bf6f7b9277d3f..f29f7a90abfad 100644

--- a/polly/docs/ReleaseNotes.rst
+++ b/polly/docs/ReleaseNotes.rst
@@ -10,3 +10,14 @@ In Polly |version| the following important changes have been incorporated.
     These release notes are for the next release of Polly and describe
     the new features that have recently been committed to our development
     branch.
+
+
+- Support for -polly-vectorizer=polly has been removed. Polly's internal
+  vectorizer is not well maintained and is known to not work in some cases
+  such as region ScopStmts. Unlike LLVM's LoopVectorize pass it also does
+  not have a target-dependent cost heuristics, and we recommend using
+  LoopVectorize instead of -polly-vectorizer=polly.
+
+  In the future we hope that Polly can collaborate better with LoopVectorize,
+  like Polly marking a loop is safe to vectorize with a specific simd width,
+  instead of replicating its functionality.

diff  --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h
index a30eb8b4bb931..13c27328d8c7e 100644
--- a/polly/include/polly/CodeGen/BlockGenerators.h
+++ b/polly/include/polly/CodeGen/BlockGenerators.h
@@ -622,186 +622,6 @@ class BlockGenerator {
   void invalidateScalarEvolution(Scop &S);
 };
 
-/// Generate a new vector basic block for a polyhedral statement.
-///
-/// The only public function exposed is generate().
-class VectorBlockGenerator final : BlockGenerator {
-public:
-  /// Generate a new vector basic block for a ScoPStmt.
-  ///
-  /// This code generation is similar to the normal, scalar code generation,
-  /// except that each instruction is code generated for several vector lanes
-  /// at a time. If possible instructions are issued as actual vector
-  /// instructions, but e.g. for address calculation instructions we currently
-  /// generate scalar instructions for each vector lane.
-  ///
-  /// @param BlockGen    A block generator object used as parent.
-  /// @param Stmt        The statement to code generate.
-  /// @param VLTS        A mapping from loops virtual canonical induction
-  ///                    variable to their new values
-  ///                    (for values recalculated in the new ScoP, but not
-  ///                     within this basic block), one for each lane.
-  /// @param Schedule    A map from the statement to a schedule where the
-  ///                    innermost dimension is the dimension of the innermost
-  ///                    loop containing the statement.
-  /// @param NewAccesses A map from memory access ids to new ast expressions,
-  ///                    which may contain new access expressions for certain
-  ///                    memory accesses.
-  static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt,
-                       std::vector<LoopToScevMapT> &VLTS,
-                       __isl_keep isl_map *Schedule,
-                       __isl_keep isl_id_to_ast_expr *NewAccesses) {
-    VectorBlockGenerator Generator(BlockGen, VLTS, Schedule);
-    Generator.copyStmt(Stmt, NewAccesses);
-  }
-
-private:
-  // This is a vector of loop->scev maps.  The first map is used for the first
-  // vector lane, ...
-  // Each map, contains information about Instructions in the old ScoP, which
-  // are recalculated in the new SCoP. When copying the basic block, we replace
-  // all references to the old instructions with their recalculated values.
-  //
-  // For example, when the code generator produces this AST:
-  //
-  //   for (int c1 = 0; c1 <= 1023; c1 += 1)
-  //     for (int c2 = 0; c2 <= 1023; c2 += VF)
-  //       for (int lane = 0; lane <= VF; lane += 1)
-  //         Stmt(c2 + lane + 3, c1);
-  //
-  // VLTS[lane] contains a map:
-  //   "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"),
-  //   "inner loop in the old loop nest" -> SCEV("c1").
-  std::vector<LoopToScevMapT> &VLTS;
-
-  // A map from the statement to a schedule where the innermost dimension is the
-  // dimension of the innermost loop containing the statement.
-  isl_map *Schedule;
-
-  VectorBlockGenerator(BlockGenerator &BlockGen,
-                       std::vector<LoopToScevMapT> &VLTS,
-                       __isl_keep isl_map *Schedule);
-
-  int getVectorWidth();
-
-  Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap,
-                        VectorValueMapT &ScalarMaps, Loop *L);
-
-  /// Load a vector from a set of adjacent scalars
-  ///
-  /// In case a set of scalars is known to be next to each other in memory,
-  /// create a vector load that loads those scalars
-  ///
-  /// %vector_ptr= bitcast double* %p to <4 x double>*
-  /// %vec_full = load <4 x double>* %vector_ptr
-  ///
-  /// @param Stmt           The statement to code generate.
-  /// @param NegativeStride This is used to indicate a -1 stride. In such
-  ///                       a case we load the end of a base address and
-  ///                       shuffle the accesses in reverse order into the
-  ///                       vector. By default we would do only positive
-  ///                       strides.
-  ///
-  /// @param NewAccesses    A map from memory access ids to new ast
-  ///                       expressions, which may contain new access
-  ///                       expressions for certain memory accesses.
-  Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load,
-                               VectorValueMapT &ScalarMaps,
-                               __isl_keep isl_id_to_ast_expr *NewAccesses,
-                               bool NegativeStride);
-
-  /// Load a vector initialized from a single scalar in memory
-  ///
-  /// In case all elements of a vector are initialized to the same
-  /// scalar value, this value is loaded and shuffled into all elements
-  /// of the vector.
-  ///
-  /// %splat_one = load <1 x double>* %p
-  /// %splat = shufflevector <1 x double> %splat_one, <1 x
-  ///       double> %splat_one, <4 x i32> zeroinitializer
-  ///
-  /// @param NewAccesses A map from memory access ids to new ast expressions,
-  ///                    which may contain new access expressions for certain
-  ///                    memory accesses.
-  Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load,
-                                ValueMapT &BBMap,
-                                __isl_keep isl_id_to_ast_expr *NewAccesses);
-
-  /// Load a vector from scalars distributed in memory
-  ///
-  /// In case some scalars a distributed randomly in memory. Create a vector
-  /// by loading each scalar and by inserting one after the other into the
-  /// vector.
-  ///
-  /// %scalar_1= load double* %p_1
-  /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0
-  /// %scalar 2 = load double* %p_2
-  /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1
-  ///
-  /// @param NewAccesses A map from memory access ids to new ast expressions,
-  ///                    which may contain new access expressions for certain
-  ///                    memory accesses.
-  Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load,
-                                   VectorValueMapT &ScalarMaps,
-                                   __isl_keep isl_id_to_ast_expr *NewAccesses);
-
-  /// @param NewAccesses A map from memory access ids to new ast expressions,
-  ///                    which may contain new access expressions for certain
-  ///                    memory accesses.
-  void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
-                    VectorValueMapT &ScalarMaps,
-                    __isl_keep isl_id_to_ast_expr *NewAccesses);
-
-  void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
-                     ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
-
-  void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
-                      ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
-
-  /// @param NewAccesses A map from memory access ids to new ast expressions,
-  ///                    which may contain new access expressions for certain
-  ///                    memory accesses.
-  void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
-                 VectorValueMapT &ScalarMaps,
-                 __isl_keep isl_id_to_ast_expr *NewAccesses);
-
-  /// @param NewAccesses A map from memory access ids to new ast expressions,
-  ///                    which may contain new access expressions for certain
-  ///                    memory accesses.
-  void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst,
-                          ValueMapT &VectorMap, VectorValueMapT &ScalarMaps,
-                          __isl_keep isl_id_to_ast_expr *NewAccesses);
-
-  bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
-                           VectorValueMapT &ScalarMaps);
-
-  bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
-
-  /// Generate vector loads for scalars.
-  ///
-  /// @param Stmt           The scop statement for which to generate the loads.
-  /// @param VectorBlockMap A map that will be updated to relate the original
-  ///                       values with the newly generated vector loads.
-  void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap);
-
-  /// Verify absence of scalar stores.
-  ///
-  /// @param Stmt The scop statement to check for scalar stores.
-  void verifyNoScalarStores(ScopStmt &Stmt);
-
-  /// @param NewAccesses A map from memory access ids to new ast expressions,
-  ///                    which may contain new access expressions for certain
-  ///                    memory accesses.
-  void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
-                       VectorValueMapT &ScalarMaps,
-                       __isl_keep isl_id_to_ast_expr *NewAccesses);
-
-  /// @param NewAccesses A map from memory access ids to new ast expressions,
-  ///                    which may contain new access expressions for certain
-  ///                    memory accesses.
-  void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses);
-};
-
 /// Generator for new versions of polyhedral region statements.
 class RegionGenerator final : BlockGenerator {
 public:

diff  --git a/polly/include/polly/CodeGen/CodeGeneration.h b/polly/include/polly/CodeGen/CodeGeneration.h
index 77668cbbda754..57aec1d70cc72 100644
--- a/polly/include/polly/CodeGen/CodeGeneration.h
+++ b/polly/include/polly/CodeGen/CodeGeneration.h
@@ -18,7 +18,6 @@ namespace polly {
 enum VectorizerChoice {
   VECTORIZER_NONE,
   VECTORIZER_STRIPMINE,
-  VECTORIZER_POLLY,
 };
 extern VectorizerChoice PollyVectorizerChoice;
 

diff  --git a/polly/include/polly/CodeGen/IslNodeBuilder.h b/polly/include/polly/CodeGen/IslNodeBuilder.h
index 2dc7f019e84cf..05f53d79d74a4 100644
--- a/polly/include/polly/CodeGen/IslNodeBuilder.h
+++ b/polly/include/polly/CodeGen/IslNodeBuilder.h
@@ -310,7 +310,6 @@ class IslNodeBuilder {
   /// @returns False, iff a problem occurred and the load was not preloaded.
   bool preloadInvariantEquivClass(InvariantEquivClassTy &IAClass);
 
-  void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
   void createForSequential(isl::ast_node_for For, bool MarkParallel);
 
   /// Create LLVM-IR that executes a for node thread parallel.
@@ -375,10 +374,6 @@ class IslNodeBuilder {
                                  std::vector<Value *> &IVS,
                                  __isl_take isl_id *IteratorID);
   virtual void createIf(__isl_take isl_ast_node *If);
-  void createUserVector(__isl_take isl_ast_node *User,
-                        std::vector<Value *> &IVS,
-                        __isl_take isl_id *IteratorID,
-                        __isl_take isl_union_map *Schedule);
   virtual void createUser(__isl_take isl_ast_node *User);
   virtual void createBlock(__isl_take isl_ast_node *Block);
 

diff  --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp
index 60d6b763dfb5e..5dbef232b2d3e 100644
--- a/polly/lib/CodeGen/BlockGenerators.cpp
+++ b/polly/lib/CodeGen/BlockGenerators.cpp
@@ -1009,391 +1009,6 @@ void BlockGenerator::finalizeSCoP(Scop &S) {
   invalidateScalarEvolution(S);
 }
 
-VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen,
-                                           std::vector<LoopToScevMapT> &VLTS,
-                                           isl_map *Schedule)
-    : BlockGenerator(BlockGen), VLTS(VLTS), Schedule(Schedule) {
-  assert(Schedule && "No statement domain provided");
-}
-
-Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old,
-                                            ValueMapT &VectorMap,
-                                            VectorValueMapT &ScalarMaps,
-                                            Loop *L) {
-  if (Value *NewValue = VectorMap.lookup(Old))
-    return NewValue;
-
-  int Width = getVectorWidth();
-
-  Value *Vector = UndefValue::get(FixedVectorType::get(Old->getType(), Width));
-
-  for (int Lane = 0; Lane < Width; Lane++)
-    Vector = Builder.CreateInsertElement(
-        Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], VLTS[Lane], L),
-        Builder.getInt32(Lane));
-
-  VectorMap[Old] = Vector;
-
-  return Vector;
-}
-
-Value *VectorBlockGenerator::generateStrideOneLoad(
-    ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
-    __isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) {
-  unsigned VectorWidth = getVectorWidth();
-  Type *VectorType = FixedVectorType::get(Load->getType(), VectorWidth);
-  Type *VectorPtrType =
-      PointerType::get(VectorType, Load->getPointerAddressSpace());
-  unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
-
-  Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[Offset],
-                                               VLTS[Offset], NewAccesses);
-  Value *VectorPtr =
-      Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
-  LoadInst *VecLoad = Builder.CreateLoad(VectorType, VectorPtr,
-                                         Load->getName() + "_p_vec_full");
-  if (!Aligned)
-    VecLoad->setAlignment(Align(8));
-
-  if (NegativeStride) {
-    SmallVector<Constant *, 16> Indices;
-    for (int i = VectorWidth - 1; i >= 0; i--)
-      Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
-    Constant *SV = llvm::ConstantVector::get(Indices);
-    Value *RevVecLoad = Builder.CreateShuffleVector(
-        VecLoad, VecLoad, SV, Load->getName() + "_reverse");
-    return RevVecLoad;
-  }
-
-  return VecLoad;
-}
-
-Value *VectorBlockGenerator::generateStrideZeroLoad(
-    ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap,
-    __isl_keep isl_id_to_ast_expr *NewAccesses) {
-  Type *VectorType = FixedVectorType::get(Load->getType(), 1);
-  Type *VectorPtrType =
-      PointerType::get(VectorType, Load->getPointerAddressSpace());
-  Value *NewPointer =
-      generateLocationAccessed(Stmt, Load, BBMap, VLTS[0], NewAccesses);
-  Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
-                                           Load->getName() + "_p_vec_p");
-  LoadInst *ScalarLoad = Builder.CreateLoad(VectorType, VectorPtr,
-                                            Load->getName() + "_p_splat_one");
-
-  if (!Aligned)
-    ScalarLoad->setAlignment(Align(8));
-
-  Constant *SplatVector = Constant::getNullValue(
-      FixedVectorType::get(Builder.getInt32Ty(), getVectorWidth()));
-
-  Value *VectorLoad = Builder.CreateShuffleVector(
-      ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
-  return VectorLoad;
-}
-
-Value *VectorBlockGenerator::generateUnknownStrideLoad(
-    ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
-    __isl_keep isl_id_to_ast_expr *NewAccesses) {
-  int VectorWidth = getVectorWidth();
-  Type *ElemTy = Load->getType();
-  auto *FVTy = FixedVectorType::get(ElemTy, VectorWidth);
-
-  Value *Vector = UndefValue::get(FVTy);
-
-  for (int i = 0; i < VectorWidth; i++) {
-    Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[i],
-                                                 VLTS[i], NewAccesses);
-    Value *ScalarLoad =
-        Builder.CreateLoad(ElemTy, NewPointer, Load->getName() + "_p_scalar_");
-    Vector = Builder.CreateInsertElement(
-        Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
-  }
-
-  return Vector;
-}
-
-void VectorBlockGenerator::generateLoad(
-    ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
-    VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
-  if (Value *PreloadLoad = GlobalMap.lookup(Load)) {
-    VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad,
-                                                Load->getName() + "_p");
-    return;
-  }
-
-  if (!VectorType::isValidElementType(Load->getType())) {
-    for (int i = 0; i < getVectorWidth(); i++)
-      ScalarMaps[i][Load] =
-          generateArrayLoad(Stmt, Load, ScalarMaps[i], VLTS[i], NewAccesses);
-    return;
-  }
-
-  const MemoryAccess &Access = Stmt.getArrayAccessFor(Load);
-
-  // Make sure we have scalar values available to access the pointer to
-  // the data location.
-  extractScalarValues(Load, VectorMap, ScalarMaps);
-
-  Value *NewLoad;
-  if (Access.isStrideZero(isl::manage_copy(Schedule)))
-    NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0], NewAccesses);
-  else if (Access.isStrideOne(isl::manage_copy(Schedule)))
-    NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses);
-  else if (Access.isStrideX(isl::manage_copy(Schedule), -1))
-    NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses, true);
-  else
-    NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps, NewAccesses);
-
-  VectorMap[Load] = NewLoad;
-}
-
-void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
-                                         ValueMapT &VectorMap,
-                                         VectorValueMapT &ScalarMaps) {
-  int VectorWidth = getVectorWidth();
-  Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap,
-                                     ScalarMaps, getLoopForStmt(Stmt));
-
-  assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
-
-  const CastInst *Cast = dyn_cast<CastInst>(Inst);
-  auto *DestType = FixedVectorType::get(Inst->getType(), VectorWidth);
-  VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
-}
-
-void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
-                                          ValueMapT &VectorMap,
-                                          VectorValueMapT &ScalarMaps) {
-  Loop *L = getLoopForStmt(Stmt);
-  Value *OpZero = Inst->getOperand(0);
-  Value *OpOne = Inst->getOperand(1);
-
-  Value *NewOpZero, *NewOpOne;
-  NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L);
-  NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L);
-
-  Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
-                                       Inst->getName() + "p_vec");
-  VectorMap[Inst] = NewInst;
-}
-
-void VectorBlockGenerator::copyStore(
-    ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
-    VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
-  const MemoryAccess &Access = Stmt.getArrayAccessFor(Store);
-
-  Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap,
-                                 ScalarMaps, getLoopForStmt(Stmt));
-
-  // Make sure we have scalar values available to access the pointer to
-  // the data location.
-  extractScalarValues(Store, VectorMap, ScalarMaps);
-
-  if (Access.isStrideOne(isl::manage_copy(Schedule))) {
-    Type *VectorType = FixedVectorType::get(Store->getValueOperand()->getType(),
-                                            getVectorWidth());
-    Type *VectorPtrType =
-        PointerType::get(VectorType, Store->getPointerAddressSpace());
-    Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[0],
-                                                 VLTS[0], NewAccesses);
-
-    Value *VectorPtr =
-        Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
-    StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
-
-    if (!Aligned)
-      Store->setAlignment(Align(8));
-  } else {
-    for (unsigned i = 0; i < ScalarMaps.size(); i++) {
-      Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
-      Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[i],
-                                                   VLTS[i], NewAccesses);
-      Builder.CreateStore(Scalar, NewPointer);
-    }
-  }
-}
-
-bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
-                                             ValueMapT &VectorMap) {
-  for (Value *Operand : Inst->operands())
-    if (VectorMap.count(Operand))
-      return true;
-  return false;
-}
-
-bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
-                                               ValueMapT &VectorMap,
-                                               VectorValueMapT &ScalarMaps) {
-  bool HasVectorOperand = false;
-  int VectorWidth = getVectorWidth();
-
-  for (Value *Operand : Inst->operands()) {
-    ValueMapT::iterator VecOp = VectorMap.find(Operand);
-
-    if (VecOp == VectorMap.end())
-      continue;
-
-    HasVectorOperand = true;
-    Value *NewVector = VecOp->second;
-
-    for (int i = 0; i < VectorWidth; ++i) {
-      ValueMapT &SM = ScalarMaps[i];
-
-      // If there is one scalar extracted, all scalar elements should have
-      // already been extracted by the code here. So no need to check for the
-      // existence of all of them.
-      if (SM.count(Operand))
-        break;
-
-      SM[Operand] =
-          Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
-    }
-  }
-
-  return HasVectorOperand;
-}
-
-void VectorBlockGenerator::copyInstScalarized(
-    ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
-    VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
-  bool HasVectorOperand;
-  int VectorWidth = getVectorWidth();
-
-  HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
-
-  for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
-    BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane],
-                                    VLTS[VectorLane], NewAccesses);
-
-  if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
-    return;
-
-  // Make the result available as vector value.
-  auto *FVTy = FixedVectorType::get(Inst->getType(), VectorWidth);
-  Value *Vector = UndefValue::get(FVTy);
-
-  for (int i = 0; i < VectorWidth; i++)
-    Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
-                                         Builder.getInt32(i));
-
-  VectorMap[Inst] = Vector;
-}
-
-int VectorBlockGenerator::getVectorWidth() { return VLTS.size(); }
-
-void VectorBlockGenerator::copyInstruction(
-    ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
-    VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
-  // Terminator instructions control the control flow. They are explicitly
-  // expressed in the clast and do not need to be copied.
-  if (Inst->isTerminator())
-    return;
-
-  if (canSyntheziseInStmt(Stmt, Inst))
-    return;
-
-  if (auto *Load = dyn_cast<LoadInst>(Inst)) {
-    generateLoad(Stmt, Load, VectorMap, ScalarMaps, NewAccesses);
-    return;
-  }
-
-  if (hasVectorOperands(Inst, VectorMap)) {
-    if (auto *Store = dyn_cast<StoreInst>(Inst)) {
-      // Identified as redundant by -polly-simplify.
-      if (!Stmt.getArrayAccessOrNULLFor(Store))
-        return;
-
-      copyStore(Stmt, Store, VectorMap, ScalarMaps, NewAccesses);
-      return;
-    }
-
-    if (auto *Unary = dyn_cast<UnaryInstruction>(Inst)) {
-      copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps);
-      return;
-    }
-
-    if (auto *Binary = dyn_cast<BinaryOperator>(Inst)) {
-      copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps);
-      return;
-    }
-
-    // Fallthrough: We generate scalar instructions, if we don't know how to
-    // generate vector code.
-  }
-
-  copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses);
-}
-
-void VectorBlockGenerator::generateScalarVectorLoads(
-    ScopStmt &Stmt, ValueMapT &VectorBlockMap) {
-  for (MemoryAccess *MA : Stmt) {
-    if (MA->isArrayKind() || MA->isWrite())
-      continue;
-
-    auto *Address = getOrCreateAlloca(*MA);
-    Type *VectorType = FixedVectorType::get(MA->getElementType(), 1);
-    Type *VectorPtrType = PointerType::get(
-        VectorType, Address->getType()->getPointerAddressSpace());
-    Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType,
-                                             Address->getName() + "_p_vec_p");
-    auto *Val = Builder.CreateLoad(VectorType, VectorPtr,
-                                   Address->getName() + ".reload");
-    Constant *SplatVector = Constant::getNullValue(
-        FixedVectorType::get(Builder.getInt32Ty(), getVectorWidth()));
-
-    Value *VectorVal = Builder.CreateShuffleVector(
-        Val, Val, SplatVector, Address->getName() + "_p_splat");
-    VectorBlockMap[MA->getAccessValue()] = VectorVal;
-  }
-}
-
-void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) {
-  for (MemoryAccess *MA : Stmt) {
-    if (MA->isArrayKind() || MA->isRead())
-      continue;
-
-    llvm_unreachable("Scalar stores not expected in vector loop");
-  }
-}
-
-void VectorBlockGenerator::copyStmt(
-    ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
-  assert(Stmt.isBlockStmt() &&
-         "TODO: Only block statements can be copied by the vector block "
-         "generator");
-
-  BasicBlock *BB = Stmt.getBasicBlock();
-  BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
-                                  &*Builder.GetInsertPoint(), &DT, &LI);
-  CopyBB->setName("polly.stmt." + BB->getName());
-  Builder.SetInsertPoint(&CopyBB->front());
-
-  // Create two maps that store the mapping from the original instructions of
-  // the old basic block to their copies in the new basic block. Those maps
-  // are basic block local.
-  //
-  // As vector code generation is supported there is one map for scalar values
-  // and one for vector values.
-  //
-  // In case we just do scalar code generation, the vectorMap is not used and
-  // the scalarMap has just one dimension, which contains the mapping.
-  //
-  // In case vector code generation is done, an instruction may either appear
-  // in the vector map once (as it is calculating >vectorwidth< values at a
-  // time. Or (if the values are calculated using scalar operations), it
-  // appears once in every dimension of the scalarMap.
-  VectorValueMapT ScalarBlockMap(getVectorWidth());
-  ValueMapT VectorBlockMap;
-
-  generateScalarVectorLoads(Stmt, VectorBlockMap);
-
-  for (Instruction *Inst : Stmt.getInstructions())
-    copyInstruction(Stmt, Inst, VectorBlockMap, ScalarBlockMap, NewAccesses);
-
-  verifyNoScalarStores(Stmt);
-}
-
 BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB,
                                              BasicBlock *BBCopy) {
 

diff  --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index 843fa0e86a954..121d68ff5e268 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -390,30 +390,6 @@ Value *IslNodeBuilder::getLatestValue(Value *Original) const {
   return It->second;
 }
 
-void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
-                                      std::vector<Value *> &IVS,
-                                      __isl_take isl_id *IteratorID,
-                                      __isl_take isl_union_map *Schedule) {
-  isl_ast_expr *Expr = isl_ast_node_user_get_expr(User);
-  isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
-  isl_id *Id = isl_ast_expr_get_id(StmtExpr);
-  isl_ast_expr_free(StmtExpr);
-  ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id);
-  std::vector<LoopToScevMapT> VLTS(IVS.size());
-
-  isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain().release());
-  Schedule = isl_union_map_intersect_domain(Schedule, Domain);
-  isl_map *S = isl_map_from_union_map(Schedule);
-
-  auto *NewAccesses = createNewAccesses(Stmt, User);
-  createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID);
-  VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses);
-  isl_id_to_ast_expr_free(NewAccesses);
-  isl_map_free(S);
-  isl_id_free(Id);
-  isl_ast_node_free(User);
-}
-
 void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
   auto *Id = isl_ast_node_mark_get_id(Node);
   auto Child = isl_ast_node_mark_get_node(Node);
@@ -422,13 +398,7 @@ void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
   // it will be optimized away and we should skip it.
   if (strcmp(isl_id_get_name(Id), "SIMD") == 0 &&
       isl_ast_node_get_type(Child) == isl_ast_node_for) {
-    bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
-    int VectorWidth =
-        getNumberOfIterations(isl::manage_copy(Child).as<isl::ast_node_for>());
-    if (Vector && 1 < VectorWidth && VectorWidth <= 16)
-      createForVector(Child, VectorWidth);
-    else
-      createForSequential(isl::manage(Child).as<isl::ast_node_for>(), true);
+    createForSequential(isl::manage(Child).as<isl::ast_node_for>(), true);
     isl_id_free(Id);
     return;
   }
@@ -456,67 +426,6 @@ void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
   isl_id_free(Id);
 }
 
-void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
-                                     int VectorWidth) {
-  isl_ast_node *Body = isl_ast_node_for_get_body(For);
-  isl_ast_expr *Init = isl_ast_node_for_get_init(For);
-  isl_ast_expr *Inc = isl_ast_node_for_get_inc(For);
-  isl_ast_expr *Iterator = isl_ast_node_for_get_iterator(For);
-  isl_id *IteratorID = isl_ast_expr_get_id(Iterator);
-
-  Value *ValueLB = ExprBuilder.create(Init);
-  Value *ValueInc = ExprBuilder.create(Inc);
-
-  Type *MaxType = ExprBuilder.getType(Iterator);
-  MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType());
-  MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType());
-
-  if (MaxType != ValueLB->getType())
-    ValueLB = Builder.CreateSExt(ValueLB, MaxType);
-  if (MaxType != ValueInc->getType())
-    ValueInc = Builder.CreateSExt(ValueInc, MaxType);
-
-  std::vector<Value *> IVS(VectorWidth);
-  IVS[0] = ValueLB;
-
-  for (int i = 1; i < VectorWidth; i++)
-    IVS[i] = Builder.CreateAdd(IVS[i - 1], ValueInc, "p_vector_iv");
-
-  isl::union_map Schedule = getScheduleForAstNode(isl::manage_copy(For));
-  assert(!Schedule.is_null() &&
-         "For statement annotation does not contain its schedule");
-
-  IDToValue[IteratorID] = ValueLB;
-
-  switch (isl_ast_node_get_type(Body)) {
-  case isl_ast_node_user:
-    createUserVector(Body, IVS, isl_id_copy(IteratorID), Schedule.copy());
-    break;
-  case isl_ast_node_block: {
-    isl_ast_node_list *List = isl_ast_node_block_get_children(Body);
-
-    for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i)
-      createUserVector(isl_ast_node_list_get_ast_node(List, i), IVS,
-                       isl_id_copy(IteratorID), Schedule.copy());
-
-    isl_ast_node_free(Body);
-    isl_ast_node_list_free(List);
-    break;
-  }
-  default:
-    isl_ast_node_dump(Body);
-    llvm_unreachable("Unhandled isl_ast_node in vectorizer");
-  }
-
-  IDToValue.erase(IDToValue.find(IteratorID));
-  isl_id_free(IteratorID);
-
-  isl_ast_node_free(For);
-  isl_ast_expr_free(Iterator);
-
-  VectorLoops++;
-}
-
 /// Restore the initial ordering of dimensions of the band node
 ///
 /// In case the band node represents all the dimensions of the iteration
@@ -761,46 +670,7 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
   ParallelLoops++;
 }
 
-/// Return whether any of @p Node's statements contain partial accesses.
-///
-/// Partial accesses are not supported by Polly's vector code generator.
-static bool hasPartialAccesses(__isl_take isl_ast_node *Node) {
-  return isl_ast_node_foreach_descendant_top_down(
-             Node,
-             [](isl_ast_node *Node, void *User) -> isl_bool {
-               if (isl_ast_node_get_type(Node) != isl_ast_node_user)
-                 return isl_bool_true;
-
-               isl::ast_expr Expr =
-                   isl::manage(isl_ast_node_user_get_expr(Node));
-               isl::ast_expr StmtExpr = Expr.get_op_arg(0);
-               isl::id Id = StmtExpr.get_id();
-
-               ScopStmt *Stmt =
-                   static_cast<ScopStmt *>(isl_id_get_user(Id.get()));
-               isl::set StmtDom = Stmt->getDomain();
-               for (auto *MA : *Stmt) {
-                 if (MA->isLatestPartialAccess())
-                   return isl_bool_error;
-               }
-               return isl_bool_true;
-             },
-             nullptr) == isl_stat_error;
-}
-
 void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) {
-  bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
-
-  if (Vector && IslAstInfo::isInnermostParallel(isl::manage_copy(For)) &&
-      !IslAstInfo::isReductionParallel(isl::manage_copy(For))) {
-    int VectorWidth =
-        getNumberOfIterations(isl::manage_copy(For).as<isl::ast_node_for>());
-    if (1 < VectorWidth && VectorWidth <= 16 && !hasPartialAccesses(For)) {
-      createForVector(For, VectorWidth);
-      return;
-    }
-  }
-
   if (IslAstInfo::isExecutedInParallel(isl::manage_copy(For))) {
     createForParallel(For);
     return;

diff  --git a/polly/lib/Support/RegisterPasses.cpp b/polly/lib/Support/RegisterPasses.cpp
index a00c962f295e2..b7db1ccfba362 100644
--- a/polly/lib/Support/RegisterPasses.cpp
+++ b/polly/lib/Support/RegisterPasses.cpp
@@ -103,7 +103,6 @@ static cl::opt<VectorizerChoice, true> Vectorizer(
     "polly-vectorizer", cl::desc("Select the vectorization strategy"),
     cl::values(
         clEnumValN(VECTORIZER_NONE, "none", "No Vectorization"),
-        clEnumValN(VECTORIZER_POLLY, "polly", "Polly internal vectorizer"),
         clEnumValN(
             VECTORIZER_STRIPMINE, "stripmine",
             "Strip-mine outer loops for the loop-vectorizer to trigger")),

diff  --git a/polly/test/CodeGen/20130211-getNumberOfIterations.ll b/polly/test/CodeGen/20130211-getNumberOfIterations.ll
deleted file mode 100644
index cef0dd559e957..0000000000000
--- a/polly/test/CodeGen/20130211-getNumberOfIterations.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen -polly-vectorizer=polly < %s
-
-; This test case checks that the polly vectorizer does not crash when
-; calculating the number of iterations.
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
- at b = external global [2048 x i64], align 16
-
-define void @foo(i64 %n) {
-entry:
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.body, %entry
-  %indvar = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %cmp = icmp slt i64 %indvar, %n
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.cond
-  %arrayidx = getelementptr inbounds [2048 x i64], ptr @b, i64 0, i64 %indvar
-  store i64 1, ptr %arrayidx
-  %inc = add nsw i64 %indvar, 1
-  br label %for.cond
-
-for.end:                                          ; preds = %for.cond
-  ret void
-}
-

diff  --git a/polly/test/CodeGen/MemAccess/simple_analyze.ll b/polly/test/CodeGen/MemAccess/simple_analyze.ll
index 00c90822d6565..143651b565aff 100644
--- a/polly/test/CodeGen/MemAccess/simple_analyze.ll
+++ b/polly/test/CodeGen/MemAccess/simple_analyze.ll
@@ -1,5 +1,4 @@
 ;RUN: opt %loadPolly -polly-print-import-jscop -polly-import-jscop-postfix=transformed -disable-output < %s | FileCheck %s
-;RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-codegen -polly-vectorizer=polly -S < %s | FileCheck %s --check-prefix=JSCOPVEC
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
 
 @A = common global [100 x i32] zeroinitializer, align 4
@@ -46,31 +45,3 @@ for.end14:                                        ; preds = %for.cond4
 }
 ; CHECK-DAG: New access function '{ Stmt_for_body7[i0] -> MemRef_B[0] }' detected in JSCOP file
 ; CHECK-DAG: New access function '{ Stmt_for_body[i0] -> MemRef_A[0] }' detected in JSCOP file
-
-; Verify that the new access function (see above) is actually used during vector code generation.
-
-; JSCOPVEC:  store i32 0, ptr @B
-; JSCOPVEC:  store i32 1, ptr @B
-; JSCOPVEC:  store i32 2, ptr @B
-; JSCOPVEC:  store i32 3, ptr @B
-; JSCOPVEC:  store i32 4, ptr @B
-; JSCOPVEC:  store i32 5, ptr @B
-; JSCOPVEC:  store i32 6, ptr @B
-; JSCOPVEC:  store i32 7, ptr @B
-; JSCOPVEC:  store i32 8, ptr @B
-; JSCOPVEC:  store i32 9, ptr @B
-; JSCOPVEC:  store i32 10, ptr @B
-; JSCOPVEC:  store i32 11, ptr @B
-
-; JSCOPVEC:  store i32 0, ptr @A
-; JSCOPVEC:  store i32 1, ptr @A
-; JSCOPVEC:  store i32 2, ptr @A
-; JSCOPVEC:  store i32 3, ptr @A
-; JSCOPVEC:  store i32 4, ptr @A
-; JSCOPVEC:  store i32 5, ptr @A
-; JSCOPVEC:  store i32 6, ptr @A
-; JSCOPVEC:  store i32 7, ptr @A
-; JSCOPVEC:  store i32 8, ptr @A
-; JSCOPVEC:  store i32 9, ptr @A
-; JSCOPVEC:  store i32 10, ptr @A
-; JSCOPVEC:  store i32 11, ptr @A

diff  --git a/polly/test/CodeGen/MemAccess/simple_stride_test.ll b/polly/test/CodeGen/MemAccess/simple_stride_test.ll
deleted file mode 100644
index 8ba72a59bc42c..0000000000000
--- a/polly/test/CodeGen/MemAccess/simple_stride_test.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-import-jscop -polly-codegen -polly-vectorizer=polly -S < %s | FileCheck %s
-;
-; Check that we use the correct __new__ strides:
-;    stride zero for B
-;    stride one for A
-;
-; CHECK:  %polly.access.B = getelementptr i32, i32* %B, i64 0
-; CHECK:  %[[BC:[._a-zA-Z0-9]*]] = bitcast i32* %polly.access.B to <1 x i32>*
-; CHECK:  %[[LD:[._a-zA-Z0-9]*]] = load <1 x i32>, <1 x i32>* %[[BC]], align 8
-; CHECK:  %[[SV:[._a-zA-Z0-9]*]] = shufflevector <1 x i32> %[[LD]], <1 x i32> %[[LD]], <16 x i32> zeroinitializer
-;
-; CHECK:  %polly.access.A = getelementptr i32, i32* %A, i64 0
-; CHECK:  %[[VP:[._a-zA-Z0-9]*]] = bitcast i32* %polly.access.A to <16 x i32>*
-; CHECK:  store <16 x i32> %[[SV]], <16 x i32>* %[[VP]], align 8
-;
-;    void simple_stride(int *restrict A, int *restrict B) {
-;      for (int i = 0; i < 16; i++)
-;        A[i * 2] = B[i * 2];
-;    }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @simple_stride(i32* noalias %A, i32* noalias %B) {
-entry:
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.inc, %entry
-  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
-  %exitcond = icmp ne i64 %indvars.iv, 16
-  br i1 %exitcond, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.cond
-  %tmp = shl nsw i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %tmp
-  %tmp4 = load i32, i32* %arrayidx, align 4
-  %tmp5 = shl nsw i64 %indvars.iv, 1
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %tmp5
-  store i32 %tmp4, i32* %arrayidx3, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  br label %for.cond
-
-for.end:                                          ; preds = %for.cond
-  ret void
-}

diff  --git a/polly/test/CodeGen/OpenMP/scev-rewriting.ll b/polly/test/CodeGen/OpenMP/scev-rewriting.ll
index 0f6ca904e2592..1b229fc19d259 100644
--- a/polly/test/CodeGen/OpenMP/scev-rewriting.ll
+++ b/polly/test/CodeGen/OpenMP/scev-rewriting.ll
@@ -1,4 +1,4 @@
-; RUN: opt %loadPolly < %s -polly-vectorizer=polly -polly-parallel -polly-parallel-force -polly-process-unprofitable -polly-codegen -S | FileCheck %s
+; RUN: opt %loadPolly < %s -polly-vectorizer=stripmine -polly-parallel -polly-parallel-force -polly-process-unprofitable -polly-codegen -S | FileCheck %s
 ; CHECK: define internal void @DoStringSort_polly_subfn
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnueabi"

diff  --git a/polly/test/CodeGen/getNumberOfIterations.ll b/polly/test/CodeGen/getNumberOfIterations.ll
deleted file mode 100644
index e59a076e2efcf..0000000000000
--- a/polly/test/CodeGen/getNumberOfIterations.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-vectorizer=polly -polly-codegen \
-; RUN:      < %s -S | FileCheck %s
-
-; #pragma known-parallel
-; for (int c0 = 0; c0 <= min(15, N - 1); c0 += 1)
-;   Stmt_if_then(c0);
-
-; CHECK: polly.stmt.if.then:                               ; preds = %polly.loop_header
-; CHECK:   %p_conv = sitofp i64 %polly.indvar to float
-; CHECK:   %scevgep = getelementptr float, float* %A, i64 %polly.indvar
-; CHECK:   %_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !3, !llvm.access.group !4
-; CHECK:   %p_add = fadd float %p_conv, %_p_scalar_
-; CHECK:   store float %p_add, float* %scevgep, align 4, !alias.scope !0, !noalias !3, !llvm.access.group !4
-
-define void @foo(float* %A, i64 %N) #0 {
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %entry, %for.inc
-  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
-  %cmp1 = icmp slt i64 %i.02, %N
-  br i1 %cmp1, label %if.then, label %for.inc
-
-if.then:                                          ; preds = %for.body
-  %conv = sitofp i64 %i.02 to float
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %i.02
-  %0 = load float, float* %arrayidx, align 4
-  %add = fadd float %conv, %0
-  store float %add, float* %arrayidx, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body, %if.then
-  %inc = add nuw nsw i64 %i.02, 1
-  %exitcond = icmp ne i64 %inc, 16
-  br i1 %exitcond, label %for.body, label %for.end
-
-for.end:                                          ; preds = %for.inc
-  ret void
-}

diff  --git a/polly/test/CodeGen/if-conditions-in-vector-code.ll b/polly/test/CodeGen/if-conditions-in-vector-code.ll
deleted file mode 100644
index 0b1897e48c492..0000000000000
--- a/polly/test/CodeGen/if-conditions-in-vector-code.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; RUN: opt %loadPolly -polly-vectorizer=polly -polly-print-ast -disable-output < %s | FileCheck %s -check-prefix=AST
-; RUN: opt %loadPolly -polly-vectorizer=polly -polly-codegen -S < %s | FileCheck %s
-;
-;    void foo(float *A) {
-;      for (long i = 0; i < 16; i++) {
-;        if (i % 2)
-;          A[i] += 2;
-;        if (i % 3)
-;          A[i] += 3;
-;      }
-;    }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-; AST: #pragma simd
-; AST: #pragma known-parallel
-; AST: for (int c0 = 0; c0 <= 15; c0 += 1) {
-; AST:   if ((c0 + 1) % 2 == 0)
-; AST:     Stmt_bb4(c0);
-; AST:   if (c0 % 3 >= 1)
-; AST:     Stmt_bb11(c0);
-; AST: }
-
-; CHECK: polly.split_new_and_old
-
-define void @foo(ptr %A) {
-bb:
-  br label %bb1
-
-bb1:                                              ; preds = %bb16, %bb
-  %i.0 = phi i64 [ 0, %bb ], [ %tmp17, %bb16 ]
-  %exitcond = icmp ne i64 %i.0, 16
-  br i1 %exitcond, label %bb2, label %bb18
-
-bb2:                                              ; preds = %bb1
-  %tmp = srem i64 %i.0, 2
-  %tmp3 = icmp eq i64 %tmp, 0
-  br i1 %tmp3, label %bb8, label %bb4
-
-bb4:                                              ; preds = %bb2
-  %tmp5 = getelementptr inbounds float, ptr %A, i64 %i.0
-  %tmp6 = load float, ptr %tmp5, align 4
-  %tmp7 = fadd float %tmp6, 2.000000e+00
-  store float %tmp7, ptr %tmp5, align 4
-  br label %bb8
-
-bb8:                                              ; preds = %bb2, %bb4
-  %tmp9 = srem i64 %i.0, 3
-  %tmp10 = icmp eq i64 %tmp9, 0
-  br i1 %tmp10, label %bb15, label %bb11
-
-bb11:                                             ; preds = %bb8
-  %tmp12 = getelementptr inbounds float, ptr %A, i64 %i.0
-  %tmp13 = load float, ptr %tmp12, align 4
-  %tmp14 = fadd float %tmp13, 3.000000e+00
-  store float %tmp14, ptr %tmp12, align 4
-  br label %bb15
-
-bb15:                                             ; preds = %bb8, %bb11
-  br label %bb16
-
-bb16:                                             ; preds = %bb15
-  %tmp17 = add nsw i64 %i.0, 1
-  br label %bb1
-
-bb18:                                             ; preds = %bb1
-  ret void
-}

diff  --git a/polly/test/CodeGen/invariant_load_hoist_alignment.ll b/polly/test/CodeGen/invariant_load_hoist_alignment.ll
deleted file mode 100644
index 0ae07f7d6ae0c..0000000000000
--- a/polly/test/CodeGen/invariant_load_hoist_alignment.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -S \
-; RUN: -polly-invariant-load-hoisting=true < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
- at A = common global [1024 x i32] zeroinitializer, align 16
- at B = common global [1024 x i32] zeroinitializer, align 16
-
-declare i32 @foo(i32) readnone
-
-define void @force_alignment() nounwind {
-;CHECK: @force_alignment
-entry:
-  br label %body
-
-body:
-  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
-  %scevgep = getelementptr [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvar
-; CHECK: [[T2:%.load]] = load i32, i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @A, i32 0, i32 0), align 4
-; CHECK: %value_p.splatinsert = insertelement <4 x i32> poison, i32 [[T2]], i64 0
-  %value = load i32, i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @A, i64 0, i64 0), align 4
-  %result = tail call i32 @foo(i32 %value) nounwind
-  store i32 %result, i32* %scevgep, align 4
-  %indvar_next = add i64 %indvar, 1
-  %exitcond = icmp eq i64 %indvar_next, 4
-  br i1 %exitcond, label %return, label %body
-
-return:
-  ret void
-}
-

diff  --git a/polly/test/CodeGen/partial_write_mapped_vector.ll b/polly/test/CodeGen/partial_write_mapped_vector.ll
deleted file mode 100644
index 27d3a74bd6ba5..0000000000000
--- a/polly/test/CodeGen/partial_write_mapped_vector.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt %loadPolly -basic-aa -polly-stmt-granularity=bb -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-vectorizer=polly -polly-opt-isl -polly-ast -polly-codegen -S < %s | FileCheck %s
-;
-; Polly's vectorizer does not support partial accesses.
-;
-; for (int j = 0; j < 4; j += 1) {
-;body:
-;   val = 21.0 + 21.0;
-;   if (j > 1)
-;user:
-;     A[0] = val;
-; }
-
-define void @partial_write_mapped_vector(ptr noalias nonnull %A) {
-entry:
-  br label %for
-
-for:
-  %j = phi i32 [0, %entry], [%j.inc, %inc]
-  %j.cmp = icmp slt i32 %j, 4
-  br i1 %j.cmp, label %body, label %exit
-
-    body:
-      %val = fadd double 21.0, 21.0
-      %if.cond = icmp sgt i32 %j, 1
-      br i1 %if.cond, label %user, label %inc
-
-    user:
-      %elt= getelementptr inbounds double, ptr %A, i32 %j
-      store double %val, ptr %elt
-      br label %inc
-
-inc:
-  %j.inc = add nuw nsw i32 %j, 1
-  br label %for
-
-exit:
-  br label %return
-
-return:
-  ret void
-}
-
-
-; CHECK-LABEL: polly.stmt.body:
-; CHECK-NEXT:    %p_val = fadd double 2.100000e+01, 2.100000e+01
-; CHECK-NEXT:    %0 = trunc i64 %polly.indvar to i32
-; CHECK-NEXT:    %p_if.cond = icmp sgt i32 %0, 1
-; CHECK-NEXT:    %1 = icmp sge i64 %polly.indvar, 2
-; CHECK-NEXT:    %polly.Stmt_body_Write0.cond = icmp ne i1 %1, false
-; CHECK-NEXT:    br i1 %polly.Stmt_body_Write0.cond, label %polly.stmt.body.Stmt_body_Write0.partial, label %polly.stmt.body.cont
-
-; CHECK-LABEL:  polly.stmt.body.Stmt_body_Write0.partial:
-; CHECK-NEXT:    %polly.access.A = getelementptr double, ptr %A, i64 1
-; CHECK-NEXT:    store double %p_val, ptr %polly.access.A
-; CHECK-NEXT:    br label %polly.stmt.body.cont
-
-; CHECK-LABEL:  polly.stmt.body.cont:

diff  --git a/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop b/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop
deleted file mode 100644
index 2aadaf12d59ad..0000000000000
--- a/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-   "arrays" : [
-      {
-         "name" : "MemRef_A",
-         "sizes" : [ "*" ],
-         "type" : "double"
-      }
-   ],
-   "context" : "{  :  }",
-   "name" : "%for---%return",
-   "statements" : [
-      {
-         "accesses" : [
-            {
-               "kind" : "write",
-               "relation" : "{ Stmt_body[i0] -> MemRef_val[] }"
-            }
-         ],
-         "domain" : "{ Stmt_body[i0] : 0 <= i0 <= 3 }",
-         "name" : "Stmt_body",
-         "schedule" : "{ Stmt_body[i0] -> [i0, 0] }"
-      },
-      {
-         "accesses" : [
-            {
-               "kind" : "write",
-               "relation" : "{ Stmt_user[i0] -> MemRef_A[i0] }"
-            },
-            {
-               "kind" : "read",
-               "relation" : "{ Stmt_user[i0] -> MemRef_val[] }"
-            }
-         ],
-         "domain" : "{ Stmt_user[i0] : 2 <= i0 <= 3 }",
-         "name" : "Stmt_user",
-         "schedule" : "{ Stmt_user[i0] -> [i0, 1] }"
-      }
-   ]
-}

diff  --git a/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop.transformed b/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop.transformed
deleted file mode 100644
index 39f97f0c292ad..0000000000000
--- a/polly/test/CodeGen/partial_write_mapped_vector___%for---%return.jscop.transformed
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-   "arrays" : [
-      {
-         "name" : "MemRef_A",
-         "sizes" : [ "*" ],
-         "type" : "double"
-      }
-   ],
-   "context" : "{  :  }",
-   "name" : "%for---%return",
-   "statements" : [
-      {
-         "accesses" : [
-            {
-               "kind" : "write",
-               "relation" : "{ Stmt_body[j] -> MemRef_A[1] : j > 1 }"
-            }
-         ],
-         "domain" : "{ Stmt_body[i0] : 0 <= i0 <= 3 }",
-         "name" : "Stmt_body",
-         "schedule" : "{ Stmt_body[i0] -> [i0, 0] }"
-      },
-      {
-         "accesses" : [
-            {
-               "kind" : "write",
-               "relation" : "{ Stmt_user[i0] -> MemRef_A[i0] }"
-            },
-            {
-               "kind" : "read",
-               "relation" : "{ Stmt_user[j] -> MemRef_A[1] }"
-            }
-         ],
-         "domain" : "{ Stmt_user[i0] : 2 <= i0 <= 3 }",
-         "name" : "Stmt_user",
-         "schedule" : "{ Stmt_user[i0] -> [i0, 1] }"
-      }
-   ]
-}

diff  --git a/polly/test/CodeGen/simple_vec_assign_scalar.ll b/polly/test/CodeGen/simple_vec_assign_scalar.ll
deleted file mode 100644
index ae8a75257ac66..0000000000000
--- a/polly/test/CodeGen/simple_vec_assign_scalar.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; RUN: opt %loadPolly -basic-aa -polly-codegen \
-; RUN: -polly-vectorizer=polly -dce -S < %s | FileCheck %s
-
-; RUN: opt %loadPolly -basic-aa -polly-codegen -polly-vectorizer=stripmine -dce -S < %s | FileCheck %s --check-prefix=STRIPMINE
-
-;#define N 1024
-;float A[N];
-;float B[N];
-;
-;void simple_vec_const(void) {
-;  int i;
-;
-;  for (i = 0; i < 4; i++)
-;    B[i] = A[i] + 1;
-;}
-;int main()
-;{
-;  simple_vec_const();
-;  return A[42];
-;}
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
- at A = common global [1024 x float] zeroinitializer, align 16
- at B = common global [1024 x float] zeroinitializer, align 16
-
-define void @simple_vec_const() nounwind {
-bb:
-  br label %bb2
-
-bb2:                                              ; preds = %bb5, %bb
-  %indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ]
-  %scevgep = getelementptr [1024 x float], ptr @B, i64 0, i64 %indvar
-  %scevgep1 = getelementptr [1024 x float], ptr @A, i64 0, i64 %indvar
-  %exitcond = icmp ne i64 %indvar, 4
-  br i1 %exitcond, label %bb3, label %bb6
-
-bb3:                                              ; preds = %bb2
-  %tmp = load float, ptr %scevgep1, align 4
-  %tmp4 = fadd float %tmp, 1.000000e+00
-  store float %tmp4, ptr %scevgep, align 4
-  br label %bb5
-
-bb5:                                              ; preds = %bb3
-  %indvar.next = add i64 %indvar, 1
-  br label %bb2
-
-bb6:                                              ; preds = %bb2
-  ret void
-}
-
-define i32 @main() nounwind {
-bb:
-  call void @simple_vec_const()
-  %tmp = load float, ptr getelementptr inbounds ([1024 x float], ptr @A, i64 0, i64 42), align 8
-  %tmp1 = fptosi float %tmp to i32
-  ret i32 %tmp1
-}
-
-; STRIPMINE-NOT: <4 x float>
-
-; CHECK: %tmp_p_vec_full = load <4 x float>, ptr @A, align 8, !alias.scope !0, !noalias !3
-; CHECK: %tmp4p_vec = fadd <4 x float> %tmp_p_vec_full, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; CHECK: store <4 x float> %tmp4p_vec, ptr @B

diff  --git a/polly/test/CodeGen/simple_vec_assign_scalar_2.ll b/polly/test/CodeGen/simple_vec_assign_scalar_2.ll
deleted file mode 100644
index f26720f95df3a..0000000000000
--- a/polly/test/CodeGen/simple_vec_assign_scalar_2.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; RUN: opt %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -dce -S < %s | FileCheck %s
-
-;#define N 1024
-;float A[N];
-;float B[N];
-;
-;void simple_vec_const(void) {
-;  int i;
-;
-;  for (i = 0; i < 4; i++)
-;    B[i] = A[i] + i;
-;}
-;int main()
-;{
-;  simple_vec_const();
-;  return A[42];
-;}
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
- at A = common global [1024 x float] zeroinitializer, align 16
- at B = common global [1024 x float] zeroinitializer, align 16
-
-define void @simple_vec_const() nounwind {
-bb:
-  br label %bb2
-
-bb2:                                              ; preds = %bb6, %bb
-  %indvar = phi i64 [ %indvar.next, %bb6 ], [ 0, %bb ]
-  %scevgep = getelementptr [1024 x float], ptr @B, i64 0, i64 %indvar
-  %i.0 = trunc i64 %indvar to i32
-  %scevgep1 = getelementptr [1024 x float], ptr @A, i64 0, i64 %indvar
-  %exitcond = icmp ne i64 %indvar, 4
-  br i1 %exitcond, label %bb3, label %bb7
-
-bb3:                                              ; preds = %bb2
-  %tmp = load float, ptr %scevgep1, align 4
-  %tmp4 = sitofp i32 %i.0 to float
-  %tmp5 = fadd float %tmp, %tmp4
-  store float %tmp5, ptr %scevgep, align 4
-  br label %bb6
-
-bb6:                                              ; preds = %bb3
-  %indvar.next = add i64 %indvar, 1
-  br label %bb2
-
-bb7:                                              ; preds = %bb2
-  ret void
-}
-
-define i32 @main() nounwind {
-bb:
-  call void @simple_vec_const()
-  %tmp = load float, ptr getelementptr inbounds ([1024 x float], ptr @A, i64 0, i64 42), align 8
-  %tmp1 = fptosi float %tmp to i32
-  ret i32 %tmp1
-}
-
-
-; CHECK: insertelement <4 x float> undef, float %{{[^,]+}}, i32 0
-; CHECK: insertelement <4 x float> %0, float %{{[^,]+}}, i32 1
-; CHECK: insertelement <4 x float> %1, float %{{[^,]+}}, i32 2
-; CHECK: insertelement <4 x float> %2, float %{{[^,]+}}, i32 3
-; CHECK: fadd <4 x float> %tmp_p_vec_full, %3
-

diff  --git a/polly/test/CodeGen/simple_vec_call.ll b/polly/test/CodeGen/simple_vec_call.ll
deleted file mode 100644
index e5f2c129f135c..0000000000000
--- a/polly/test/CodeGen/simple_vec_call.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -S \
-; RUN: -polly-invariant-load-hoisting=true < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
- at A = common global [1024 x float] zeroinitializer, align 16
- at B = common global [1024 x float] zeroinitializer, align 16
-
-declare float @foo(float) readnone
-
-define void @simple_vec_call() nounwind {
-entry:
-  br label %body
-
-body:
-  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
-  %scevgep = getelementptr [1024 x float], [1024 x float]* @B, i64 0, i64 %indvar
-  %value = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 0), align 16
-  %result = tail call float @foo(float %value) nounwind
-  store float %result, float* %scevgep, align 4
-  %indvar_next = add i64 %indvar, 1
-  %exitcond = icmp eq i64 %indvar_next, 4
-  br i1 %exitcond, label %return, label %body
-
-return:
-  ret void
-}
-
-; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW:#[0-9]+]]
-; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]]
-; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]]
-; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]]
-; CHECK: [[RES5:%[a-zA-Z0-9_]+]] = insertelement <4 x float> undef, float [[RES1]], i32 0
-; CHECK: [[RES6:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[RES5]], float [[RES2]], i32 1
-; CHECK: [[RES7:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[RES6]], float [[RES3]], i32 2
-; CHECK: [[RES8:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[RES7]], float [[RES4]], i32 3
-; CHECK:  store <4 x float> [[RES8]]
-; CHECK: attributes [[NUW]] = { nounwind }

diff  --git a/polly/test/CodeGen/simple_vec_call_2.ll b/polly/test/CodeGen/simple_vec_call_2.ll
deleted file mode 100644
index b3767057dd65e..0000000000000
--- a/polly/test/CodeGen/simple_vec_call_2.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -dce \
-; RUN: -polly-invariant-load-hoisting=true -S < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
- at A = common global [1024 x float] zeroinitializer, align 16
- at B = common global [1024 x float**] zeroinitializer, align 16
-
-declare float** @foo(float) readnone
-
-define void @simple_vec_call() nounwind {
-entry:
-  br label %body
-
-body:
-  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
-  %scevgep = getelementptr [1024 x float**], [1024 x float**]* @B, i64 0, i64 %indvar
-  %value = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 0), align 16
-  %result = tail call float** @foo(float %value) nounwind
-  store float** %result, float*** %scevgep, align 4
-  %indvar_next = add i64 %indvar, 1
-  %exitcond = icmp eq i64 %indvar_next, 4
-  br i1 %exitcond, label %return, label %body
-
-return:
-  ret void
-}
-
-; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW:#[0-9]+]]
-; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]]
-; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]]
-; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]]
-; CHECK: %0 = insertelement <4 x float**> undef, float** %p_result, i32 0
-; CHECK: %1 = insertelement <4 x float**> %0, float** %p_result1, i32 1
-; CHECK: %2 = insertelement <4 x float**> %1, float** %p_result2, i32 2
-; CHECK: %3 = insertelement <4 x float**> %2, float** %p_result3, i32 3
-; CHECK: store <4 x float**> %3, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align
-; CHECK: attributes [[NUW]] = { nounwind }

diff  --git a/polly/test/CodeGen/simple_vec_cast.ll b/polly/test/CodeGen/simple_vec_cast.ll
deleted file mode 100644
index 7578663754ed7..0000000000000
--- a/polly/test/CodeGen/simple_vec_cast.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly \
-; RUN: -polly-invariant-load-hoisting=true -dce -S < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
- at A = common global [1024 x float] zeroinitializer, align 16
- at B = common global [1024 x double] zeroinitializer, align 16
-
-define void @simple_vec_const() nounwind {
-bb:
-  br label %bb1
-
-bb1:                                              ; preds = %bb3, %bb
-  %indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb ]
-  %scevgep = getelementptr [1024 x double], [1024 x double]* @B, i64 0, i64 %indvar
-  %exitcond = icmp ne i64 %indvar, 4
-  br i1 %exitcond, label %bb2, label %bb4
-
-bb2:                                              ; preds = %bb1
-  %tmp = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 0), align 16
-  %tmp2 = fpext float %tmp to double
-  store double %tmp2, double* %scevgep, align 4
-  br label %bb3
-
-bb3:                                              ; preds = %bb2
-  %indvar.next = add i64 %indvar, 1
-  br label %bb1
-
-bb4:                                              ; preds = %bb1
-  ret void
-}
-
-; CHECK:   %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0)
-
-; CHECK: polly.stmt.bb2:                                   ; preds = %polly.start
-; CHECK:   %tmp_p.splatinsert = insertelement <4 x float> poison, float %.load, i64 0
-; CHECK:   %tmp_p.splat = shufflevector <4 x float> %tmp_p.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
-; CHECK:   %0 = fpext <4 x float> %tmp_p.splat to <4 x double>
-; CHECK:   store <4 x double> %0, <4 x double>*

diff  --git a/polly/test/CodeGen/simple_vec_const.ll b/polly/test/CodeGen/simple_vec_const.ll
deleted file mode 100644
index cb5edb2bcbd0f..0000000000000
--- a/polly/test/CodeGen/simple_vec_const.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -S \
-; RUN: -polly-invariant-load-hoisting=true < %s | FileCheck %s
-
-;#define N 1024
-;float A[N];
-;float B[N];
-;
-;void simple_vec_const(void) {
-;  int i;
-;
-;  for (i = 0; i < 4; i++)
-;    B[i] = A[0];
-;}
-;int main()
-;{
-;  simple_vec_const();
-;  return A[42];
-;}
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
- at A = common global [1024 x float] zeroinitializer, align 16
- at B = common global [1024 x float] zeroinitializer, align 16
-
-define void @simple_vec_const() nounwind {
-; <label>:0
-  br label %1
-
-; <label>:1                                       ; preds = %4, %0
-  %indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ]
-  %scevgep = getelementptr [1024 x float], [1024 x float]* @B, i64 0, i64 %indvar
-  %exitcond = icmp ne i64 %indvar, 4
-  br i1 %exitcond, label %2, label %5
-
-; <label>:2                                       ; preds = %1
-  %3 = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 0), align 16
-  store float %3, float* %scevgep, align 4
-  br label %4
-
-; <label>:4                                       ; preds = %2
-  %indvar.next = add i64 %indvar, 1
-  br label %1
-
-; <label>:5                                       ; preds = %1
-  ret void
-}
-
-define i32 @main() nounwind {
-  call void @simple_vec_const()
-  %1 = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i64 0, i64 42), align 8
-  %2 = fptosi float %1 to i32
-  ret i32 %2
-}
-
-
-; CHECK:   %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0)
-
-; CHECK: polly.stmt.:                                      ; preds = %polly.start
-; CHECK:   %_p.splatinsert = insertelement <4 x float> poison, float %.load, i64 0
-; CHECK:   %_p.splat = shufflevector <4 x float> %_p.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer

diff  --git a/polly/test/CodeGen/simple_vec_large_width.ll b/polly/test/CodeGen/simple_vec_large_width.ll
deleted file mode 100644
index 4ad3b89876165..0000000000000
--- a/polly/test/CodeGen/simple_vec_large_width.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -dce -S < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
- at A = common global [1024 x float] zeroinitializer, align 16
- at B = common global [1024 x float] zeroinitializer, align 16
-
-define void @simple_vec_large_width() nounwind {
-; <label>:0
-  br label %1
-
-; <label>:1                                       ; preds = %4, %0
-  %indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ]
-  %scevgep = getelementptr [1024 x float], ptr @B, i64 0, i64 %indvar
-  %scevgep1 = getelementptr [1024 x float], ptr @A, i64 0, i64 %indvar
-  %exitcond = icmp ne i64 %indvar, 15
-  br i1 %exitcond, label %2, label %5
-
-; <label>:2                                       ; preds = %1
-  %3 = load float, ptr %scevgep1, align 4
-  store float %3, ptr %scevgep, align 4
-  br label %4
-
-; <label>:4                                       ; preds = %2
-  %indvar.next = add i64 %indvar, 1
-  br label %1
-
-; <label>:5                                       ; preds = %1
-  ret void
-}
-
-define i32 @main() nounwind {
-  call void @simple_vec_large_width()
-  %1 = load float, ptr getelementptr inbounds ([1024 x float], ptr @A, i64 0, i64 42), align 8
-  %2 = fptosi float %1 to i32
-  ret i32 %2
-}
-
-; CHECK: [[VEC1:%[a-zA-Z0-9_]+_full]] = load <15 x float>, ptr
-; CHECK: store <15 x float> [[VEC1]]

diff  --git a/polly/test/CodeGen/simple_vec_ptr_ptr_ty.ll b/polly/test/CodeGen/simple_vec_ptr_ptr_ty.ll
deleted file mode 100644
index 8e4b8bce6f8ff..0000000000000
--- a/polly/test/CodeGen/simple_vec_ptr_ptr_ty.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -S \
-; RUN: -polly-invariant-load-hoisting=true < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
- at A = common global [1024 x float**] zeroinitializer, align 16
- at B = common global [1024 x float**] zeroinitializer, align 16
-
-declare float @foo(float) readnone
-
-define void @simple_vec_call() nounwind {
-entry:
-  br label %body
-
-body:
-  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
-  %scevgep = getelementptr [1024 x float**], [1024 x float**]* @B, i64 0, i64 %indvar
-  %value = load float**, float*** getelementptr inbounds ([1024 x float**], [1024 x float**]* @A, i64 0, i64 0), align 16
-  store float** %value, float*** %scevgep, align 4
-  %indvar_next = add i64 %indvar, 1
-  %exitcond = icmp eq i64 %indvar_next, 4
-  br i1 %exitcond, label %return, label %body
-
-return:
-  ret void
-}
-; CHECK:   %.load = load float**, float*** getelementptr inbounds ([1024 x float**], [1024 x float**]* @A, i32 0, i32 0)
-
-; CHECK-NOT: load <1 x float**>
-; CHECK: %value_p.splatinsert = insertelement <4 x float**> poison, float** %.load, i64 0
-; CHECK: %value_p.splat = shufflevector <4 x float**> %value_p.splatinsert, <4 x float**> poison, <4 x i32> zeroinitializer
-; CHECK: store <4 x float**> %value_p.splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8

diff  --git a/polly/test/CodeGen/simple_vec_stride_negative_one.ll b/polly/test/CodeGen/simple_vec_stride_negative_one.ll
deleted file mode 100644
index 79ecf657d8902..0000000000000
--- a/polly/test/CodeGen/simple_vec_stride_negative_one.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen -polly-vectorizer=polly -S < %s | FileCheck %s
-
-; ModuleID = 'reverse.c'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-;int A[100];
-;void foo() {
-;  for (int i=3; i >= 0; i--)
-;    A[i]+=1;
-;}
-
-
- at A = common global [100 x i32] zeroinitializer, align 16
-
-; Function Attrs: nounwind uwtable
-define void @foo() #0 {
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %entry, %for.body
-  %indvars.iv = phi i64 [ 3, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [100 x i32], ptr @A, i64 0, i64 %indvars.iv
-  %0 = load i32, ptr %arrayidx, align 4
-  %add = add nsw i32 %0, 1
-  store i32 %add, ptr %arrayidx, align 4
-  %indvars.iv.next = add nsw i64 %indvars.iv, -1
-  %1 = trunc i64 %indvars.iv to i32
-  %cmp = icmp sgt i32 %1, 0
-  br i1 %cmp, label %for.body, label %for.end
-
-for.end:                                          ; preds = %for.body
-  ret void
-}
-
-; CHECK: @foo
-; CHECK: [[LOAD:%[a-zA-Z0-9_]+]] = load <4 x i32>, ptr
-; CHECK: [[REVERSE_LOAD:%[a-zA-Z0-9_]+reverse]] = shufflevector <4 x i32> [[LOAD]], <4 x i32> [[LOAD]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>

diff  --git a/polly/test/CodeGen/simple_vec_stride_one.ll b/polly/test/CodeGen/simple_vec_stride_one.ll
deleted file mode 100644
index 8250879283b19..0000000000000
--- a/polly/test/CodeGen/simple_vec_stride_one.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-codegen -polly-vectorizer=polly \
-; RUN:                 < %s -S | FileCheck %s
-
-; CHECK: store <4 x double> %val.s2a_p_splat, <4 x double>* %vector_ptr
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @update_access_functions(i64 %arg, double* %A, double* %B) {
-bb3:
-  br label %loop1
-
-loop1:
-  %indvar = phi i64 [ %indvar.next, %loop1 ], [ 0, %bb3 ]
-  %ptr1 = getelementptr inbounds double, double* %A, i64 %indvar
-  store double 42.0, double* %ptr1, align 8
-  %indvar.next = add nuw nsw i64 %indvar, 1
-  %cmp = icmp ne i64 %indvar.next, 4
-  br i1 %cmp, label %loop1, label %loop2
-
-loop2:
-  %indvar.2 = phi i64 [ %indvar.2.next, %loop2 ], [ 0, %loop1 ]
-  %ptr2 = getelementptr inbounds double, double* %A, i64 %indvar.2
-  %val = load double, double* %ptr2, align 8
-  %indvar.2.next = add nuw nsw i64 %indvar.2, 1
-  %cmp.2 = icmp ne i64 %indvar.2.next, 4
-  br i1 %cmp.2, label %loop2, label %loop3
-
-loop3:
-  %indvar.3 = phi i64 [ %indvar.3.next, %loop3 ], [ 0, %loop2 ]
-  %ptr3 = getelementptr inbounds double, double* %A, i64 %indvar.3
-  store double %val, double* %ptr3, align 8
-  %indvar.3.next = add nuw nsw i64 %indvar.3, 1
-  %cmp.3 = icmp ne i64 %indvar.3.next, 4
-  br i1 %cmp.3, label %loop3, label %exit
-
-exit:
-  ret void
-}

diff  --git a/polly/test/CodeGen/simple_vec_stride_x.ll b/polly/test/CodeGen/simple_vec_stride_x.ll
deleted file mode 100644
index 20a8e8a3bbcc4..0000000000000
--- a/polly/test/CodeGen/simple_vec_stride_x.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: opt %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly  -dce -S < %s | FileCheck %s
-
-;#define N 1024
-;float A[N];
-;float B[N];
-;
-;void simple_vec_stride_x(void) {
-;  int i;
-;
-;  for (i = 0; i < 4; i++)
-;    B[2 * i] = A[2 * i];
-;}
-;int main()
-;{
-;  simple_vec_stride_x();
-;  return A[42];
-;}
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
- at A = common global [1024 x float] zeroinitializer, align 16
- at B = common global [1024 x float] zeroinitializer, align 16
-
-define void @simple_vec_stride_x() nounwind {
-bb:
-  br label %bb2
-
-bb2:                                              ; preds = %bb5, %bb
-  %indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ]
-  %tmp = mul i64 %indvar, 2
-  %scevgep = getelementptr [1024 x float], ptr @B, i64 0, i64 %tmp
-  %scevgep1 = getelementptr [1024 x float], ptr @A, i64 0, i64 %tmp
-  %exitcond = icmp ne i64 %indvar, 4
-  br i1 %exitcond, label %bb3, label %bb6
-
-bb3:                                              ; preds = %bb2
-  %tmp4 = load float, ptr %scevgep1, align 8
-  store float %tmp4, ptr %scevgep, align 8
-  br label %bb5
-
-bb5:                                              ; preds = %bb3
-  %indvar.next = add i64 %indvar, 1
-  br label %bb2
-
-bb6:                                              ; preds = %bb2
-  ret void
-}
-
-define i32 @main() nounwind {
-bb:
-  call void @simple_vec_stride_x()
-  %tmp = load float, ptr getelementptr inbounds ([1024 x float], ptr @A, i64 0, i64 42), align 8
-  %tmp1 = fptosi float %tmp to i32
-  ret i32 %tmp1
-}
-
-; CHECK: [[LOAD1:%[a-zA-Z0-9_]+_scalar_]] = load float, ptr
-; CHECK: [[VEC1:%[a-zA-Z0-9_]+]] = insertelement <4 x float> undef, float [[LOAD1]], i32 0
-; CHECK: [[LOAD2:%[a-zA-Z0-9_]+]] = load float, ptr
-; CHECK: [[VEC2:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC1]], float [[LOAD2]], i32 1
-; CHECK: [[LOAD3:%[a-zA-Z0-9_]+]] = load float, ptr
-; CHECK: [[VEC3:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC2]], float [[LOAD3]], i32 2
-; CHECK: [[LOAD4:%[a-zA-Z0-9_]+]] = load float, ptr
-; CHECK: [[VEC4:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC3]], float [[LOAD4]], i32 3
-; CHECK: [[EL1:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 0
-; CHECK: store float [[EL1]]
-; CHECK: [[EL2:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 1
-; CHECK: store float [[EL2]]
-; CHECK: [[EL3:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 2
-; CHECK: store float [[EL3]]
-; CHECK: [[EL4:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 3
-; CHECK: store float [[EL4]]

diff  --git a/polly/test/CodeGen/simple_vec_strides_multidim.ll b/polly/test/CodeGen/simple_vec_strides_multidim.ll
deleted file mode 100644
index 98837af2fbc82..0000000000000
--- a/polly/test/CodeGen/simple_vec_strides_multidim.ll
+++ /dev/null
@@ -1,76 +0,0 @@
-; RUN: opt %loadPolly -polly-opt-isl -polly-codegen -polly-vectorizer=polly -polly-prevect-width=8 -S -dce < %s | FileCheck %s
-;
-;    void foo(long n, float A[restrict][n], float B[restrict][n],
-;             float C[restrict][n], float D[restrict][n]) {
-;      for (long i = 0; i < 8; i++)
-;        for (long j = 0; j < 8; j++)
-;          A[i][j] += B[i][0] + C[i][2 * j] + D[j][0];
-;    }
-;
-
-; CHECK: shufflevector
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: insertelement
-; CHECK: store <8 x float>
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo(i64 %n, ptr noalias %A, ptr noalias %B, ptr noalias %C, ptr noalias %D) {
-bb:
-  br label %bb3
-
-bb3:                                              ; preds = %bb25, %bb
-  %i.0 = phi i64 [ 0, %bb ], [ %tmp26, %bb25 ]
-  %exitcond2 = icmp ne i64 %i.0, 8
-  br i1 %exitcond2, label %bb4, label %bb27
-
-bb4:                                              ; preds = %bb3
-  br label %bb5
-
-bb5:                                              ; preds = %bb22, %bb4
-  %j.0 = phi i64 [ 0, %bb4 ], [ %tmp23, %bb22 ]
-  %exitcond = icmp ne i64 %j.0, 8
-  br i1 %exitcond, label %bb6, label %bb24
-
-bb6:                                              ; preds = %bb5
-  %tmp = mul nsw i64 %i.0, %n
-  %tmp7 = getelementptr inbounds float, ptr %B, i64 %tmp
-  %tmp8 = load float, ptr %tmp7, align 4
-  %tmp9 = shl nsw i64 %j.0, 1
-  %tmp10 = mul nsw i64 %i.0, %n
-  %.sum = add i64 %tmp10, %tmp9
-  %tmp11 = getelementptr inbounds float, ptr %C, i64 %.sum
-  %tmp12 = load float, ptr %tmp11, align 4
-  %tmp13 = fadd float %tmp8, %tmp12
-  %tmp14 = mul nsw i64 %j.0, %n
-  %tmp15 = getelementptr inbounds float, ptr %D, i64 %tmp14
-  %tmp16 = load float, ptr %tmp15, align 4
-  %tmp17 = fadd float %tmp13, %tmp16
-  %tmp18 = mul nsw i64 %i.0, %n
-  %.sum1 = add i64 %tmp18, %j.0
-  %tmp19 = getelementptr inbounds float, ptr %A, i64 %.sum1
-  %tmp20 = load float, ptr %tmp19, align 4
-  %tmp21 = fadd float %tmp20, %tmp17
-  store float %tmp21, ptr %tmp19, align 4
-  br label %bb22
-
-bb22:                                             ; preds = %bb6
-  %tmp23 = add nsw i64 %j.0, 1
-  br label %bb5
-
-bb24:                                             ; preds = %bb5
-  br label %bb25
-
-bb25:                                             ; preds = %bb24
-  %tmp26 = add nsw i64 %i.0, 1
-  br label %bb3
-
-bb27:                                             ; preds = %bb3
-  ret void
-}

diff  --git a/polly/test/CodeGen/simple_vec_two_stmts.ll b/polly/test/CodeGen/simple_vec_two_stmts.ll
deleted file mode 100644
index 14c12c4887b53..0000000000000
--- a/polly/test/CodeGen/simple_vec_two_stmts.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: opt %loadPolly -basic-aa -polly-codegen -polly-vectorizer=polly -dce -S < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
- at A = common global [1024 x float] zeroinitializer, align 16
- at B = common global [1024 x float] zeroinitializer, align 16
- at C = common global [1024 x float] zeroinitializer, align 16
-
-define void @simple_vec_stride_one() nounwind {
-bb0:
-  br label %bb1
-
-bb1:
-  %indvar = phi i64 [ %indvar.next, %bb4 ], [ 0, %bb0 ]
-  %scevgep = getelementptr [1024 x float], ptr @B, i64 0, i64 %indvar
-  %scevgep2 = getelementptr [1024 x float], ptr @C, i64 0, i64 %indvar
-  %scevgep1 = getelementptr [1024 x float], ptr @A, i64 0, i64 %indvar
-  %exitcond = icmp ne i64 %indvar, 4
-  br i1 %exitcond, label %bb2a, label %bb5
-
-bb2a:
-  %tmp1 = load float, ptr %scevgep1, align 4
-  store float %tmp1, ptr %scevgep, align 4
-  br label %bb2b
-
-bb2b:
-  %tmp2 = load float, ptr %scevgep1, align 4
-  store float %tmp2, ptr %scevgep2, align 4
-  br label %bb4
-
-bb4:
-  %indvar.next = add i64 %indvar, 1
-  br label %bb1
-
-bb5:
-  ret void
-}
-
-define i32 @main() nounwind {
-  call void @simple_vec_stride_one()
-  %1 = load float, ptr getelementptr inbounds ([1024 x float], ptr @A, i64 0, i64 42), align 8
-  %2 = fptosi float %1 to i32
-  ret i32 %2
-}
-
-; CHECK: [[LOAD1:%[a-zA-Z0-9_]+_full]] = load <4 x float>, ptr
-; CHECK: store <4 x float> [[LOAD1]]
-; CHECK: [[LOAD2:%[a-zA-Z0-9_]+_full]] = load <4 x float>, ptr
-; CHECK: store <4 x float> [[LOAD2]]
-

diff  --git a/polly/test/CodeGen/stride_detection.ll b/polly/test/CodeGen/stride_detection.ll
deleted file mode 100644
index 3f9044baf599d..0000000000000
--- a/polly/test/CodeGen/stride_detection.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: opt -opaque-pointers=0 %loadPolly -polly-opt-isl -polly-vectorizer=polly -polly-codegen < %s -S | FileCheck %s
-
-; #pragma known-parallel
-;   for (int c0 = 0; c0 <= 31; c0 += 1)
-;     for (int c1 = 0; c1 <= floord(nk - 1, 32); c1 += 1)
-;       for (int c2 = 0; c2 <= 7; c2 += 1)
-;         for (int c3 = 0; c3 <= min(31, nk - 32 * c1 - 1); c3 += 1)
-;           #pragma simd
-;           for (int c4 = 0; c4 <= 3; c4 += 1)
-;             Stmt_for_body_3(32 * c0 + 4 * c2 + c4, 32 * c1 + c3);
-
-; CHECK: polly.stmt.for.body.3:                            ; preds = %polly.loop_header18
-; CHECK:   %_p_splat_one = load <1 x double>, <1 x double>* %_p_vec_p, align 8, !alias.scope !3, !noalias !6, !llvm.access.group !2
-; CHECK:   %_p_vec_full = load <4 x double>, <4 x double>* %vector_ptr, align 8, !alias.scope !6, !noalias !3, !llvm.access.group !2
-; CHECK:   extractelement <4 x double> %addp_vec, i32 0
-; CHECK:   extractelement <4 x double> %addp_vec, i32 1
-; CHECK:   extractelement <4 x double> %addp_vec, i32 2
-; CHECK:   extractelement <4 x double> %addp_vec, i32 3
-; CHECK:   store <4 x double> %addp_vec, <4 x double>* {{.*}}, align 8, !alias.scope !6, !noalias !3, !llvm.access.group !2
-
-define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, [1024 x double]* %C, [1024 x double]* %A) #0 {
-entry:
-  br label %for.cond.1.preheader
-
-for.cond.1.preheader:                             ; preds = %entry, %for.inc.10
-  %indvars.iv16 = phi i64 [ 0, %entry ], [ %indvars.iv.next17, %for.inc.10 ]
-  %cmp2.13 = icmp sgt i32 %nk, 0
-  br i1 %cmp2.13, label %for.body.3.lr.ph, label %for.inc.10
-
-for.body.3.lr.ph:                                 ; preds = %for.cond.1.preheader
-  br label %for.body.3
-
-for.body.3:                                       ; preds = %for.body.3.lr.ph, %for.body.3
-  %indvars.iv = phi i64 [ 0, %for.body.3.lr.ph ], [ %indvars.iv.next, %for.body.3 ]
-  %arrayidx5 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 0, i64 %indvars.iv
-  %0 = load double, double* %arrayidx5, align 8
-  %arrayidx9 = getelementptr inbounds [1024 x double], [1024 x double]* %C, i64 0, i64 %indvars.iv16
-  %1 = load double, double* %arrayidx9, align 8
-  %add = fadd double %0, %1
-  store double %add, double* %arrayidx9, align 8
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp ne i32 %lftr.wideiv, %nk
-  br i1 %exitcond, label %for.body.3, label %for.cond.1.for.inc.10_crit_edge
-
-for.cond.1.for.inc.10_crit_edge:                  ; preds = %for.body.3
-  br label %for.inc.10
-
-for.inc.10:                                       ; preds = %for.cond.1.for.inc.10_crit_edge, %for.cond.1.preheader
-  %indvars.iv.next17 = add nuw nsw i64 %indvars.iv16, 1
-  %exitcond18 = icmp ne i64 %indvars.iv.next17, 1024
-  br i1 %exitcond18, label %for.cond.1.preheader, label %for.end.12
-
-for.end.12:                                       ; preds = %for.inc.10
-  ret void
-}

diff  --git a/polly/test/ScheduleOptimizer/2012-04-16-Trivially-vectorizable-loops.ll b/polly/test/ScheduleOptimizer/2012-04-16-Trivially-vectorizable-loops.ll
deleted file mode 100644
index 8379da290769d..0000000000000
--- a/polly/test/ScheduleOptimizer/2012-04-16-Trivially-vectorizable-loops.ll
+++ /dev/null
@@ -1,204 +0,0 @@
-; RUN: opt %loadPolly -basic-aa -polly-opt-isl -polly-vectorizer=polly -disable-output < %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-%struct._IO_FILE = type { i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i64, i16, i8, [1 x i8], ptr, i64, ptr, ptr, ptr, ptr, i64, i32, [20 x i8] }
-%struct._IO_marker = type { ptr, ptr, i32 }
-
- at A = common global [1536 x [1536 x float]] zeroinitializer, align 16
- at B = common global [1536 x [1536 x float]] zeroinitializer, align 16
- at stdout = external global ptr
- at .str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
- at C = common global [1536 x [1536 x float]] zeroinitializer, align 16
- at .str1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
-
-define void @init_array() nounwind uwtable {
-entry:
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.inc17, %entry
-  %i.0 = phi i32 [ 0, %entry ], [ %inc18, %for.inc17 ]
-  %cmp = icmp slt i32 %i.0, 1536
-  br i1 %cmp, label %for.body, label %for.end19
-
-for.body:                                         ; preds = %for.cond
-  br label %for.cond1
-
-for.cond1:                                        ; preds = %for.inc, %for.body
-  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
-  %cmp2 = icmp slt i32 %j.0, 1536
-  br i1 %cmp2, label %for.body3, label %for.end
-
-for.body3:                                        ; preds = %for.cond1
-  %mul = mul nsw i32 %i.0, %j.0
-  %rem = srem i32 %mul, 1024
-  %add = add nsw i32 1, %rem
-  %conv = sitofp i32 %add to double
-  %div = fdiv double %conv, 2.000000e+00
-  %conv4 = fptrunc double %div to float
-  %idxprom = sext i32 %j.0 to i64
-  %idxprom5 = sext i32 %i.0 to i64
-  %arrayidx = getelementptr inbounds [1536 x [1536 x float]], ptr @A, i32 0, i64 %idxprom5
-  %arrayidx6 = getelementptr inbounds [1536 x float], ptr %arrayidx, i32 0, i64 %idxprom
-  store float %conv4, ptr %arrayidx6, align 4
-  %mul7 = mul nsw i32 %i.0, %j.0
-  %rem8 = srem i32 %mul7, 1024
-  %add9 = add nsw i32 1, %rem8
-  %conv10 = sitofp i32 %add9 to double
-  %div11 = fdiv double %conv10, 2.000000e+00
-  %conv12 = fptrunc double %div11 to float
-  %idxprom13 = sext i32 %j.0 to i64
-  %idxprom14 = sext i32 %i.0 to i64
-  %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], ptr @B, i32 0, i64 %idxprom14
-  %arrayidx16 = getelementptr inbounds [1536 x float], ptr %arrayidx15, i32 0, i64 %idxprom13
-  store float %conv12, ptr %arrayidx16, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body3
-  %inc = add nsw i32 %j.0, 1
-  br label %for.cond1
-
-for.end:                                          ; preds = %for.cond1
-  br label %for.inc17
-
-for.inc17:                                        ; preds = %for.end
-  %inc18 = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end19:                                        ; preds = %for.cond
-  ret void
-}
-
-define void @print_array() nounwind uwtable {
-entry:
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.inc10, %entry
-  %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
-  %cmp = icmp slt i32 %i.0, 1536
-  br i1 %cmp, label %for.body, label %for.end12
-
-for.body:                                         ; preds = %for.cond
-  br label %for.cond1
-
-for.cond1:                                        ; preds = %for.inc, %for.body
-  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
-  %cmp2 = icmp slt i32 %j.0, 1536
-  br i1 %cmp2, label %for.body3, label %for.end
-
-for.body3:                                        ; preds = %for.cond1
-  %0 = load ptr, ptr @stdout, align 8
-  %idxprom = sext i32 %j.0 to i64
-  %idxprom4 = sext i32 %i.0 to i64
-  %arrayidx = getelementptr inbounds [1536 x [1536 x float]], ptr @C, i32 0, i64 %idxprom4
-  %arrayidx5 = getelementptr inbounds [1536 x float], ptr %arrayidx, i32 0, i64 %idxprom
-  %1 = load float, ptr %arrayidx5, align 4
-  %conv = fpext float %1 to double
-  %call = call i32 (ptr, ptr, ...) @fprintf(ptr %0, ptr @.str, double %conv)
-  %rem = srem i32 %j.0, 80
-  %cmp6 = icmp eq i32 %rem, 79
-  br i1 %cmp6, label %if.then, label %if.end
-
-if.then:                                          ; preds = %for.body3
-  %2 = load ptr, ptr @stdout, align 8
-  %call8 = call i32 (ptr, ptr, ...) @fprintf(ptr %2, ptr @.str1)
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %for.body3
-  br label %for.inc
-
-for.inc:                                          ; preds = %if.end
-  %inc = add nsw i32 %j.0, 1
-  br label %for.cond1
-
-for.end:                                          ; preds = %for.cond1
-  %3 = load ptr, ptr @stdout, align 8
-  %call9 = call i32 (ptr, ptr, ...) @fprintf(ptr %3, ptr @.str1)
-  br label %for.inc10
-
-for.inc10:                                        ; preds = %for.end
-  %inc11 = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end12:                                        ; preds = %for.cond
-  ret void
-}
-
-declare i32 @fprintf(ptr, ptr, ...)
-
-define i32 @main() nounwind uwtable {
-entry:
-  call void @init_array()
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.inc28, %entry
-  %i.0 = phi i32 [ 0, %entry ], [ %inc29, %for.inc28 ]
-  %cmp = icmp slt i32 %i.0, 1536
-  br i1 %cmp, label %for.body, label %for.end30
-
-for.body:                                         ; preds = %for.cond
-  br label %for.cond1
-
-for.cond1:                                        ; preds = %for.inc25, %for.body
-  %j.0 = phi i32 [ 0, %for.body ], [ %inc26, %for.inc25 ]
-  %cmp2 = icmp slt i32 %j.0, 1536
-  br i1 %cmp2, label %for.body3, label %for.end27
-
-for.body3:                                        ; preds = %for.cond1
-  %idxprom = sext i32 %j.0 to i64
-  %idxprom4 = sext i32 %i.0 to i64
-  %arrayidx = getelementptr inbounds [1536 x [1536 x float]], ptr @C, i32 0, i64 %idxprom4
-  %arrayidx5 = getelementptr inbounds [1536 x float], ptr %arrayidx, i32 0, i64 %idxprom
-  store float 0.000000e+00, ptr %arrayidx5, align 4
-  br label %for.cond6
-
-for.cond6:                                        ; preds = %for.inc, %for.body3
-  %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
-  %cmp7 = icmp slt i32 %k.0, 1536
-  br i1 %cmp7, label %for.body8, label %for.end
-
-for.body8:                                        ; preds = %for.cond6
-  %idxprom9 = sext i32 %j.0 to i64
-  %idxprom10 = sext i32 %i.0 to i64
-  %arrayidx11 = getelementptr inbounds [1536 x [1536 x float]], ptr @C, i32 0, i64 %idxprom10
-  %arrayidx12 = getelementptr inbounds [1536 x float], ptr %arrayidx11, i32 0, i64 %idxprom9
-  %0 = load float, ptr %arrayidx12, align 4
-  %idxprom13 = sext i32 %k.0 to i64
-  %idxprom14 = sext i32 %i.0 to i64
-  %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], ptr @A, i32 0, i64 %idxprom14
-  %arrayidx16 = getelementptr inbounds [1536 x float], ptr %arrayidx15, i32 0, i64 %idxprom13
-  %1 = load float, ptr %arrayidx16, align 4
-  %idxprom17 = sext i32 %j.0 to i64
-  %idxprom18 = sext i32 %k.0 to i64
-  %arrayidx19 = getelementptr inbounds [1536 x [1536 x float]], ptr @B, i32 0, i64 %idxprom18
-  %arrayidx20 = getelementptr inbounds [1536 x float], ptr %arrayidx19, i32 0, i64 %idxprom17
-  %2 = load float, ptr %arrayidx20, align 4
-  %mul = fmul float %1, %2
-  %add = fadd float %0, %mul
-  %idxprom21 = sext i32 %j.0 to i64
-  %idxprom22 = sext i32 %i.0 to i64
-  %arrayidx23 = getelementptr inbounds [1536 x [1536 x float]], ptr @C, i32 0, i64 %idxprom22
-  %arrayidx24 = getelementptr inbounds [1536 x float], ptr %arrayidx23, i32 0, i64 %idxprom21
-  store float %add, ptr %arrayidx24, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body8
-  %inc = add nsw i32 %k.0, 1
-  br label %for.cond6
-
-for.end:                                          ; preds = %for.cond6
-  br label %for.inc25
-
-for.inc25:                                        ; preds = %for.end
-  %inc26 = add nsw i32 %j.0, 1
-  br label %for.cond1
-
-for.end27:                                        ; preds = %for.cond1
-  br label %for.inc28
-
-for.inc28:                                        ; preds = %for.end27
-  %inc29 = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end30:                                        ; preds = %for.cond
-  ret i32 0
-}

diff  --git a/polly/test/ScheduleOptimizer/prevectorization-without-tiling.ll b/polly/test/ScheduleOptimizer/prevectorization-without-tiling.ll
index 9089d45c1320e..fea2155b1e4ed 100644
--- a/polly/test/ScheduleOptimizer/prevectorization-without-tiling.ll
+++ b/polly/test/ScheduleOptimizer/prevectorization-without-tiling.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S %loadPolly -basic-aa -polly-tiling=false -polly-pattern-matching-based-opts=false -polly-vectorizer=polly -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s
+; RUN: opt -S %loadPolly -basic-aa -polly-tiling=false -polly-pattern-matching-based-opts=false -polly-vectorizer=stripmine -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 @C = common global [1536 x [1536 x float]] zeroinitializer, align 16

diff  --git a/polly/test/ScheduleOptimizer/prevectorization.ll b/polly/test/ScheduleOptimizer/prevectorization.ll
index 55943d6daf736..385ebf14712ab 100644
--- a/polly/test/ScheduleOptimizer/prevectorization.ll
+++ b/polly/test/ScheduleOptimizer/prevectorization.ll
@@ -1,6 +1,5 @@
-; RUN: opt -S %loadPolly -basic-aa -polly-pattern-matching-based-opts=false -polly-vectorizer=polly                         -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s
-; RUN: opt -S %loadPolly -basic-aa -polly-pattern-matching-based-opts=false -polly-vectorizer=stripmine                     -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s
-; RUN: opt -S %loadPolly -basic-aa -polly-pattern-matching-based-opts=false -polly-vectorizer=polly -polly-prevect-width=16 -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s -check-prefix=VEC16
+; RUN: opt -S %loadPolly -basic-aa -polly-pattern-matching-based-opts=false -polly-vectorizer=stripmine                         -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s
+; RUN: opt -S %loadPolly -basic-aa -polly-pattern-matching-based-opts=false -polly-vectorizer=stripmine -polly-prevect-width=16 -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s -check-prefix=VEC16
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 

diff  --git a/polly/test/ScheduleOptimizer/rectangular-tiling.ll b/polly/test/ScheduleOptimizer/rectangular-tiling.ll
index c254e4f367c4d..b527255ab5f7b 100644
--- a/polly/test/ScheduleOptimizer/rectangular-tiling.ll
+++ b/polly/test/ScheduleOptimizer/rectangular-tiling.ll
@@ -2,7 +2,6 @@
 ; RUN: opt %loadPolly -polly-tile-sizes=256,16 -polly-tiling=false                                                                                                                    -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s --check-prefix=NOTILING
 ; RUN: opt %loadPolly -polly-tile-sizes=256,16 -polly-2nd-level-tiling -polly-2nd-level-tile-sizes=16,8                                                                               -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s --check-prefix=TWOLEVEL
 ; RUN: opt %loadPolly -polly-tile-sizes=256,16 -polly-2nd-level-tiling -polly-2nd-level-tile-sizes=16,8 -polly-register-tiling                                                        -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s --check-prefix=TWO-PLUS-REGISTER
-; RUN: opt %loadPolly -polly-tile-sizes=256,16 -polly-2nd-level-tiling -polly-2nd-level-tile-sizes=16,8 -polly-register-tiling -polly-register-tile-sizes=2,4 -polly-vectorizer=polly -polly-opt-isl -polly-print-ast -disable-output < %s | FileCheck %s --check-prefix=TWO-PLUS-REGISTER-PLUS-VECTORIZATION
 
 ; CHECK: // 1st level tiling - Tiles
 ; CHECK: for (int c0 = 0; c0 <= 3; c0 += 1)
@@ -49,21 +48,6 @@
 ; TWO-PLUS-REGISTER:               Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5 + 1);
 ; TWO-PLUS-REGISTER:             }
 
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma known-parallel
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c0 = 0; c0 <= 3; c0 += 1)
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:   for (int c1 = 0; c1 <= 31; c1 += 1)
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:     for (int c2 = 0; c2 <= 15; c2 += 1)
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:       for (int c3 = 0; c3 <= 1; c3 += 1)
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:         for (int c4 = 0; c4 <= 7; c4 += 1)
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:           for (int c5 = 0; c5 <= 1; c5 += 1) {
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:             // SIMD
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:             for (int c8 = 0; c8 <= 3; c8 += 1)
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:               Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 4 * c5 + c8);
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:             // SIMD
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:             for (int c8 = 0; c8 <= 3; c8 += 1)
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:               Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 4 * c5 + c8);
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:           }
-
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
 
 ; Function Attrs: nounwind

diff  --git a/polly/test/ScheduleOptimizer/vec-addr-space.ll b/polly/test/ScheduleOptimizer/vec-addr-space.ll
deleted file mode 100644
index 563c37f256ca0..0000000000000
--- a/polly/test/ScheduleOptimizer/vec-addr-space.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: opt %loadPolly -polly-vectorizer=polly -polly-opt-isl -polly-codegen -S < %s | FileCheck %s
-;
-; Polly crashed during codegen with an assertion error while trying to generate
-; a pointer bitcast from a pointer having an address space to one without
-;
-; CHECK-LABEL: entry:
-; CHECK: load <4 x float>, ptr addrspace(4)
-;
-; ModuleID = '/tmp/lud.bc'
-source_filename = "lud.c"
-; This datalayout was for a 32-bit ARC processor with 512-bit vector extension
-target datalayout = "e-m:e-p:32:32-p1:32:32-p3:32:32-p5:32:32-i64:32-f64:32-v64:32-v128:32-a:0:32-v256:32-v512:32-n8:16:32"
-; Specify x86 because the ARC backend is still experimental and not built by default
-target triple = "x86_64-unknown-unknown"
-
-; Function Attrs: noinline nounwind
-define void @LU_decomp_kij_opt(i32 %n, i32 %lda, ptr addrspace(4) %A, ptr addrspace(4) %scratch) #0 {
-entry:
-  %cmp34 = icmp sgt i32 %n, 0
-  br i1 %cmp34, label %for.body.lr.ph, label %for.end34
-
-for.body.lr.ph:                                   ; preds = %entry
-  %0 = add nsw i32 %n, -1
-  br label %for.body
-
-for.body:                                         ; preds = %for.inc32, %for.body.lr.ph
-  %k.035 = phi i32 [ 0, %for.body.lr.ph ], [ %add2, %for.inc32 ]
-  %mul = mul nsw i32 %k.035, %lda
-  %add = add nsw i32 %mul, %k.035
-  %arrayidx = getelementptr inbounds float, ptr addrspace(4) %A, i32 %add
-  %1 = load float, ptr addrspace(4) %arrayidx, align 4
-  %conv1 = fdiv arcp float 1.000000e+00, %1
-  %add2 = add nuw nsw i32 %k.035, 1
-  %exitcond37 = icmp eq i32 %k.035, %0
-  br i1 %exitcond37, label %for.end34, label %for.body6.lr.ph
-
-for.body6.lr.ph:                                  ; preds = %for.body
-  br label %for.body6
-
-for.body6:                                        ; preds = %for.inc29, %for.body6.lr.ph
-  %i.033 = phi i32 [ %add2, %for.body6.lr.ph ], [ %inc30, %for.inc29 ]
-  %mul7 = mul nsw i32 %i.033, %lda
-  %add8 = add nsw i32 %mul7, %k.035
-  %arrayidx9 = getelementptr inbounds float, ptr addrspace(4) %A, i32 %add8
-  %2 = load float, ptr addrspace(4) %arrayidx9, align 4
-  %mul10 = fmul arcp contract float %conv1, %2
-  store float %mul10, ptr addrspace(4) %arrayidx9, align 4
-  br label %for.body18
-
-for.body18:                                       ; preds = %for.body18, %for.body6
-  %j.031 = phi i32 [ %add2, %for.body6 ], [ %inc, %for.body18 ]
-  %3 = load float, ptr addrspace(4) %arrayidx9, align 4
-  %add23 = add nsw i32 %j.031, %mul
-  %arrayidx24 = getelementptr inbounds float, ptr addrspace(4) %A, i32 %add23
-  %4 = load float, ptr addrspace(4) %arrayidx24, align 4
-  %mul25 = fmul arcp contract float %3, %4
-  %add27 = add nsw i32 %j.031, %mul7
-  %arrayidx28 = getelementptr inbounds float, ptr addrspace(4) %A, i32 %add27
-  %5 = load float, ptr addrspace(4) %arrayidx28, align 4
-  %sub = fsub arcp contract float %5, %mul25
-  store float %sub, ptr addrspace(4) %arrayidx28, align 4
-  %inc = add nuw nsw i32 %j.031, 1
-  %exitcond = icmp eq i32 %inc, %n
-  br i1 %exitcond, label %for.inc29, label %for.body18
-
-for.inc29:                                        ; preds = %for.body18
-  %inc30 = add nuw nsw i32 %i.033, 1
-  %exitcond36 = icmp eq i32 %inc30, %n
-  br i1 %exitcond36, label %for.inc32, label %for.body6
-
-for.inc32:                                        ; preds = %for.inc29
-  br label %for.body
-
-for.end34:                                        ; preds = %for.body, %entry
-  ret void
-}
-
-attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
-
-!llvm.module.flags = !{!0, !1}
-!llvm.ident = !{!2}
-
-!0 = !{i32 1, !"ArcIntrinsicCheck", i32 18224056}
-!1 = !{i32 1, !"wchar_size", i32 2}
-!2 = !{!"clang version 10.0.1 "}