[llvm] 878309c - Revert "[LoopInterchange] New cost model for loop interchange"

Thu Jun 23 16:11:10 PDT 2022

Author: Evgenii Stepanov
Date: 2022-06-23T16:10:53-07:00
New Revision: 878309cc54f18ccabf4ead44c12ce5c0e2345306

URL: https://github.com/llvm/llvm-project/commit/878309cc54f18ccabf4ead44c12ce5c0e2345306
DIFF: https://github.com/llvm/llvm-project/commit/878309cc54f18ccabf4ead44c12ce5c0e2345306.diff

LOG: Revert "[LoopInterchange] New cost model for loop interchange"

llvm/lib/Analysis/LoopCacheAnalysis.cpp:702:30: runtime error: signed
integer overflow: 6148914691236517209 * 100 cannot be represented in
type 'long'

https://lab.llvm.org/buildbot/#/builders/5/builds/25185

This reverts commit 1b24fe34b06cd9f2337313f513a8b19f9a37c5de.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopInterchange.cpp
    llvm/test/Transforms/LICM/lnicm.ll
    llvm/test/Transforms/LoopInterchange/call-instructions.ll
    llvm/test/Transforms/LoopInterchange/currentLimitation.ll
    llvm/test/Transforms/LoopInterchange/debuginfo.ll
    llvm/test/Transforms/LoopInterchange/inner-indvar-depend-on-outer-indvar.ll
    llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
    llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll
    llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
    llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll
    llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
    llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll
    llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
    llvm/test/Transforms/LoopInterchange/interchangeable.ll
    llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll
    llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
    llvm/test/Transforms/LoopInterchange/lcssa.ll
    llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
    llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
    llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
    llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
    llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll
    llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll
    llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll
    llvm/test/Transforms/LoopInterchange/phi-ordering.ll
    llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll
    llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
    llvm/test/Transforms/LoopInterchange/pr43326.ll
    llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll
    llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll
    llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll
    llvm/test/Transforms/LoopInterchange/pr48212.ll
    llvm/test/Transforms/LoopInterchange/profitability.ll
    llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
    llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll
    llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 1d3023d04463f..ced4396d5e798 100644

--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -18,7 +18,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
-#include "llvm/Analysis/LoopCacheAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopNestAnalysis.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -359,10 +358,8 @@ class LoopInterchangeProfitability {
       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
 
   /// Check if the loop interchange is profitable.
-  bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
-                    unsigned InnerLoopId, unsigned OuterLoopId,
-                    CharMatrix &DepMatrix,
-                    const DenseMap<const Loop *, unsigned> &CostMap);
+  bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
+                    CharMatrix &DepMatrix);
 
 private:
   int getInstrOrderCost();
@@ -413,15 +410,13 @@ struct LoopInterchange {
   LoopInfo *LI = nullptr;
   DependenceInfo *DI = nullptr;
   DominatorTree *DT = nullptr;
-  std::unique_ptr<CacheCost> CC = nullptr;
 
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter *ORE;
 
   LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
-                  DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
-                  OptimizationRemarkEmitter *ORE)
-      : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
+                  DominatorTree *DT, OptimizationRemarkEmitter *ORE)
+      : SE(SE), LI(LI), DI(DI), DT(DT), ORE(ORE) {}
 
   bool run(Loop *L) {
     if (L->getParentLoop())
@@ -504,21 +499,6 @@ struct LoopInterchange {
     }
 
     unsigned SelecLoopId = selectLoopForInterchange(LoopList);
-    // Obtain the loop vector returned from loop cache analysis beforehand,
-    // and put each <Loop, index> pair into a map for constant time query
-    // later. Indices in loop vector reprsent the optimal order of the
-    // corresponding loop, e.g., given a loopnest with depth N, index 0
-    // indicates the loop should be placed as the outermost loop and index N
-    // indicates the loop should be placed as the innermost loop.
-    //
-    // For the old pass manager CacheCost would be null.
-    DenseMap<const Loop *, unsigned> CostMap;
-    if (CC != nullptr) {
-      const auto &LoopCosts = CC->getLoopCosts();
-      for (unsigned i = 0; i < LoopCosts.size(); i++) {
-        CostMap[LoopCosts[i].first] = i;
-      }
-    }
     // We try to achieve the globally optimal memory access for the loopnest,
     // and do interchange based on a bubble-sort fasion. We start from
     // the innermost loop, move it outwards to the best possible position
@@ -527,7 +507,7 @@ struct LoopInterchange {
       bool ChangedPerIter = false;
       for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
         bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
-                                        DependencyMatrix, CostMap);
+                                        DependencyMatrix);
         if (!Interchanged)
           continue;
         // Loops interchanged, update LoopList accordingly.
@@ -551,8 +531,7 @@ struct LoopInterchange {
 
   bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
                    unsigned OuterLoopId,
-                   std::vector<std::vector<char>> &DependencyMatrix,
-                   const DenseMap<const Loop *, unsigned> &CostMap) {
+                   std::vector<std::vector<char>> &DependencyMatrix) {
     LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
                       << " and OuterLoopId = " << OuterLoopId << "\n");
     LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
@@ -562,8 +541,7 @@ struct LoopInterchange {
     }
     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
-    if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
-                          DependencyMatrix, CostMap)) {
+    if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
       return false;
     }
@@ -1157,33 +1135,21 @@ static bool isProfitableForVectorization(unsigned InnerLoopId,
   return !DepMatrix.empty();
 }
 
-bool LoopInterchangeProfitability::isProfitable(
-    const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
-    unsigned OuterLoopId, CharMatrix &DepMatrix,
-    const DenseMap<const Loop *, unsigned> &CostMap) {
-  // TODO: Remove the legacy cost model.
-
-  // This is the new cost model returned from loop cache analysis.
-  // A smaller index means the loop should be placed an outer loop, and vice
-  // versa.
-  if (CostMap.find(InnerLoop) != CostMap.end() &&
-      CostMap.find(OuterLoop) != CostMap.end()) {
-    unsigned InnerIndex = 0, OuterIndex = 0;
-    InnerIndex = CostMap.find(InnerLoop)->second;
-    OuterIndex = CostMap.find(OuterLoop)->second;
-    LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
-                      << ", OuterIndex = " << OuterIndex << "\n");
-    if (InnerIndex < OuterIndex)
-      return true;
-  } else {
-    // Legacy cost model: this is rough cost estimation algorithm. It counts the
-    // good and bad order of induction variables in the instruction and allows
-    // reordering if number of bad orders is more than good.
-    int Cost = getInstrOrderCost();
-    LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
-    if (Cost < -LoopInterchangeCostThreshold)
-      return true;
-  }
+bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
+                                                unsigned OuterLoopId,
+                                                CharMatrix &DepMatrix) {
+  // TODO: Add better profitability checks.
+  // e.g
+  // 1) Construct dependency matrix and move the one with no loop carried dep
+  //    inside to enable vectorization.
+
+  // This is rough cost estimation algorithm. It counts the good and bad order
+  // of induction variables in the instruction and allows reordering if number
+  // of bad orders is more than good.
+  int Cost = getInstrOrderCost();
+  LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
+  if (Cost < -LoopInterchangeCostThreshold)
+    return true;
 
   // It is not profitable as per current cache profitability model. But check if
   // we can move this loop outside to improve parallelism.
@@ -1194,8 +1160,10 @@ bool LoopInterchangeProfitability::isProfitable(
     return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
                                     InnerLoop->getStartLoc(),
                                     InnerLoop->getHeader())
-           << "Interchanging loops is too costly and it does not improve "
-              "parallelism.";
+           << "Interchanging loops is too costly (cost="
+           << ore::NV("Cost", Cost) << ", threshold="
+           << ore::NV("Threshold", LoopInterchangeCostThreshold)
+           << ") and it does not improve parallelism.";
   });
   return false;
 }
@@ -1741,8 +1709,8 @@ struct LoopInterchangeLegacyPass : public LoopPass {
     auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-    std::unique_ptr<CacheCost> CC = nullptr;
-    return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L);
+
+    return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
   }
 };
 } // namespace
@@ -1769,10 +1737,8 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
   Function &F = *LN.getParent();
 
   DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
-  std::unique_ptr<CacheCost> CC =
-      CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
   OptimizationRemarkEmitter ORE(&F);
-  if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
+  if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &ORE).run(LN))
     return PreservedAnalyses::all();
   return getLoopPassPreservedAnalyses();
 }

diff  --git a/llvm/test/Transforms/LICM/lnicm.ll b/llvm/test/Transforms/LICM/lnicm.ll
index 814f964666305..1ef0c1fd1421e 100644
--- a/llvm/test/Transforms/LICM/lnicm.ll
+++ b/llvm/test/Transforms/LICM/lnicm.ll
@@ -1,13 +1,12 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -aa-pipeline=basic-aa -passes='loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes INTC
-; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LNICM
-; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LICM
+; RUN: opt -aa-pipeline=basic-aa -passes='loop(loop-interchange)'       -S %s | FileCheck %s --check-prefixes INTC
+; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -S %s | FileCheck %s --check-prefixes LNICM,CHECK
+; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)'  -S %s | FileCheck %s --check-prefixes LICM,CHECK
 
 ; This test represents the following function:
-; void test(int n, int m, int x[m][n], int y[n], int *z) {
-;   for (int k = 0; k < n; k++) {
+; void test(int x[10][10], int y[10], int *z) {
+;   for (int k = 0; k < 10; k++) {
 ;     int tmp = *z;
-;     for (int i = 0; i < m; i++)
+;     for (int i = 0; i < 10; i++)
 ;       x[i][k] += y[k] + tmp;
 ;   }
 ; }
@@ -17,187 +16,82 @@
 ; perfect loop nest (e.g. loop-interchange) to perform.
 
 
-define dso_local void @test(i64 %n, i64 %m, ptr noalias %x, ptr noalias readonly %y, ptr readonly %z) {
-; The loopnest is not interchanged when we only run loop interchange.
-; INTC-LABEL: @test(
-; INTC-NEXT:  gurad:
-; INTC-NEXT:    [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
-; INTC-NEXT:    [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
-; INTC-NEXT:    br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
-; INTC:       for.cond1.preheader.lr.ph:
-; INTC-NEXT:    br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
-; INTC:       for.i.preheader:
-; INTC-NEXT:    br label [[ENTRY:%.*]]
-; INTC:       entry:
-; INTC-NEXT:    br label [[FOR_BODY:%.*]]
-; INTC:       for.body:
-; INTC-NEXT:    [[K_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
-; INTC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
-; INTC-NEXT:    br label [[FOR_BODY3:%.*]]
-; INTC:       for.body3:
-; INTC-NEXT:    [[I_01:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[INC:%.*]], [[FOR_BODY3]] ]
-; INTC-NEXT:    [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
-; INTC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
-; INTC-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; INTC-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
-; INTC-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
-; INTC-NEXT:    [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
-; INTC-NEXT:    [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
-; INTC-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
-; INTC-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
-; INTC-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
-; INTC-NEXT:    store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
-; INTC-NEXT:    [[INC]] = add nsw i32 [[I_01]], 1
-; INTC-NEXT:    [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
-; INTC-NEXT:    [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
-; INTC-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]]
-; INTC:       for.end:
-; INTC-NEXT:    [[INC10]] = add nsw i32 [[K_02]], 1
-; INTC-NEXT:    [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
-; INTC-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
-; INTC-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
-; INTC:       for.end11.loopexit:
-; INTC-NEXT:    br label [[FOR_END11]]
-; INTC:       for.end11:
-; INTC-NEXT:    ret void
-;
-; The loopnest is interchanged when we run lnicm and loop interchange.
-; LNICM-LABEL: @test(
-; LNICM-NEXT:  gurad:
-; LNICM-NEXT:    [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
-; LNICM-NEXT:    [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
-; LNICM-NEXT:    br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
-; LNICM:       for.cond1.preheader.lr.ph:
-; LNICM-NEXT:    br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
-; LNICM:       for.i.preheader:
-; LNICM-NEXT:    br label [[FOR_BODY3_PREHEADER:%.*]]
-; LNICM:       entry:
-; LNICM-NEXT:    br label [[FOR_BODY:%.*]]
-; LNICM:       for.body:
-; LNICM-NEXT:    [[K_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
-; LNICM-NEXT:    br label [[FOR_BODY3_SPLIT1:%.*]]
-; LNICM:       for.body3.preheader:
-; LNICM-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
-; LNICM-NEXT:    br label [[FOR_BODY3:%.*]]
-; LNICM:       for.body3:
-; LNICM-NEXT:    [[I_01:%.*]] = phi i32 [ [[TMP3:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 0, [[FOR_BODY3_PREHEADER]] ]
-; LNICM-NEXT:    br label [[ENTRY]]
-; LNICM:       for.body3.split1:
-; LNICM-NEXT:    [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
-; LNICM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
-; LNICM-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; LNICM-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
-; LNICM-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
-; LNICM-NEXT:    [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
-; LNICM-NEXT:    [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
-; LNICM-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
-; LNICM-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
-; LNICM-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
-; LNICM-NEXT:    store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
-; LNICM-NEXT:    [[INC:%.*]] = add nsw i32 [[I_01]], 1
-; LNICM-NEXT:    [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
-; LNICM-NEXT:    [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
-; LNICM-NEXT:    br label [[FOR_END]]
-; LNICM:       for.body3.split:
-; LNICM-NEXT:    [[TMP3]] = add nsw i32 [[I_01]], 1
-; LNICM-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
-; LNICM-NEXT:    [[TMP5:%.*]] = icmp slt i64 [[TMP4]], [[M]]
-; LNICM-NEXT:    br i1 [[TMP5]], label [[FOR_BODY3]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
-; LNICM:       for.end:
-; LNICM-NEXT:    [[INC10]] = add nsw i32 [[K_02]], 1
-; LNICM-NEXT:    [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
-; LNICM-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
-; LNICM-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_BODY3_SPLIT]], !llvm.loop [[LOOP2:![0-9]+]]
-; LNICM:       for.end11.loopexit:
-; LNICM-NEXT:    br label [[FOR_END11]]
-; LNICM:       for.end11:
-; LNICM-NEXT:    ret void
-;
-; The loopnest is not interchanged when we run licm and loop interchange.
-; LICM-LABEL: @test(
-; LICM-NEXT:  gurad:
-; LICM-NEXT:    [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
-; LICM-NEXT:    [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
-; LICM-NEXT:    br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
-; LICM:       for.cond1.preheader.lr.ph:
-; LICM-NEXT:    br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
-; LICM:       for.i.preheader:
-; LICM-NEXT:    br label [[ENTRY:%.*]]
-; LICM:       entry:
-; LICM-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
-; LICM-NEXT:    br label [[FOR_BODY:%.*]]
-; LICM:       for.body:
-; LICM-NEXT:    [[K_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
-; LICM-NEXT:    [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
-; LICM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
-; LICM-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; LICM-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
-; LICM-NEXT:    br label [[FOR_BODY3:%.*]]
-; LICM:       for.body3:
-; LICM-NEXT:    [[I_01:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[INC:%.*]], [[FOR_BODY3]] ]
-; LICM-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
-; LICM-NEXT:    [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
-; LICM-NEXT:    [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
-; LICM-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
-; LICM-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
-; LICM-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
-; LICM-NEXT:    store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
-; LICM-NEXT:    [[INC]] = add nsw i32 [[I_01]], 1
-; LICM-NEXT:    [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
-; LICM-NEXT:    [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
-; LICM-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]]
-; LICM:       for.end:
-; LICM-NEXT:    [[INC10]] = add nsw i32 [[K_02]], 1
-; LICM-NEXT:    [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
-; LICM-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
-; LICM-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
-; LICM:       for.end11.loopexit:
-; LICM-NEXT:    br label [[FOR_END11]]
-; LICM:       for.end11:
-; LICM-NEXT:    ret void
-;
+define dso_local void @test([10 x i32]* noalias %x, i32* noalias readonly %y, i32* readonly %z) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   [[Z:%.*]] = load i32, i32* %z, align 4
+; CHECK-NEXT:   br label [[FOR_BODY3_PREHEADER:%.*]]
+; LNICM:      for.body.preheader:
+; LICM-NOT:   for.body.preheader:
+; INTC-NOT:   for.body.preheader:
+; LNICM-NEXT:   br label [[FOR_BODY:%.*]]
+; CHECK:      for.body:
+; LNICM-NEXT:   [[K:%.*]] = phi i32 [ [[INC10:%.*]], [[FOR_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
+; LNICM-NEXT:   br label [[FOR_BODY3_SPLIT1:%.*]]
+; LICM:         [[TMP:%.*]] = load i32, i32* [[ARRAYIDX:%.*]], align 4
+; LNICM:      for.body3.preheader:
+; LICM-NOT:   for.body3.preheader:
+; INTC-NOT:   for.body3.preheader:
+; LNICM-NEXT:   br label [[FOR_BODY3:%.*]]
+; CHECK:      for.body3:
+; LNICM-NEXT:   [[I:%.*]] = phi i32 [ [[TMP3:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 0, [[FOR_BODY3_PREHEADER:%.*]] ]
+; LNICM-NEXT:   br label [[FOR_BODY_PREHEADER:%.*]]
+; LNICM:      for.body3.split1:
+; LNICM-NEXT:   [[IDXPROM:%.*]] = sext i32 [[K:%.*]] to i64
+; LNICM-NEXT:   [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %y, i64 [[IDXPROM:%.*]]
+; LNICM-NEXT:   [[TMP:%.*]] = load i32, i32* [[ARRAYIDX:%.*]], align 4
+; LNICM-NEXT:   [[ADD:%.*]] = add nsw i32 [[TMP:%.*]], [[Z:%.*]]
+; LNICM-NEXT:   [[IDXPROM4:%.*]] = sext i32 [[I:%.*]] to i64
+; LNICM-NEXT:   [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* %x, i64 [[IDXPROM4:%.*]]
+; LNICM-NEXT:   [[IDXPROM6:%.*]] = sext i32 [[K:%.*]] to i64
+; LNICM-NEXT:   [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX5:%.*]], i64 0, i64 [[IDXPROM6:%.*]]
+; LNICM-NEXT:   [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX7:%.*]], align 4
+; LNICM-NEXT:   [[ADD8:%.*]] = add nsw i32 [[TMP2:%.*]], [[ADD:%.*]]
+; LNICM-NEXT:   store i32 [[ADD8:%.*]], i32* [[ARRAYIDX7:%.*]], align 4
+; LNICM-NEXT:   [[INC:%.*]] = add nsw i32 [[I:%.*]], 1
+; LNICM-NEXT:   [[CMP2:%.*]] = icmp slt i32 [[INC:%.*]], 10
+; LNICM-NEXT:   br label [[FOR_END:%.*]]
+; LNICM:      for.body3.split:
+; LICM-NOT:   for.body3.split:
+; INTC-NOT:   for.body3.split:
+; LNICM-NEXT:   [[TMP3:%.*]] = add nsw i32 [[I:%.*]], 1
+; LNICM-NEXT:   [[TMP4:%.*]] = icmp slt i32 [[TMP3:%.*]], 10
+; LNICM-NEXT:   br i1 [[TMP4:%.*]], label [[FOR_BODY3:%.*]], label [[FOR_END11:%.*]], !llvm.loop !0
+; LNICM:      for.end:
+; LNICM-NEXT:   [[INC10:%.*]] = add nsw i32 [[K:%.*]], 1
+; LNICM-NEXT:   [[CMP:%.*]] = icmp slt i32 [[INC10:%.*]], 10
+; LNICM-NEXT:   br i1 [[CMP:%.*]], label [[FOR_BODY:%.*]], label [[FOR_BODY3_SPLIT:%.*]], !llvm.loop !2
+; LNICM:      for.end11:
+; LNICM-NEXT:   ret void
 
-gurad:
-  %cmp23 = icmp sgt i64 %m, 0
-  %cmp32 = icmp sgt i64 %n, 0
-  br i1 %cmp23, label %for.cond1.preheader.lr.ph, label %for.end11
-
-for.cond1.preheader.lr.ph:                        ; preds = %gurad
-  br i1 %cmp32, label %for.i.preheader, label %for.end11
-
-for.i.preheader:                                  ; preds = %for.cond1.preheader.lr.ph
-  br label %entry
-
-entry:                                  ; preds = %for.i.preheader
+entry:
   br label %for.body
 
 for.body:
   %k.02 = phi i32 [ 0, %entry ], [ %inc10, %for.end ]
-  %0 = load i32, ptr %z, align 4
+  %0 = load i32, i32* %z, align 4
   br label %for.body3
 
 for.body3:
   %i.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
   %idxprom = sext i32 %k.02 to i64
-  %arrayidx = getelementptr inbounds i32, ptr %y, i64 %idxprom
-  %1 = load i32, ptr %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
+  %1 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %1, %0
   %idxprom4 = sext i32 %i.01 to i64
-  %index0 = mul i64 %idxprom4, %n
-  %index1 = add i64 %index0, %idxprom
-  %arrayidx7 = getelementptr inbounds i32, ptr %x, i64 %index1
-  %2 = load i32, ptr %arrayidx7, align 4
+  %arrayidx5 = getelementptr inbounds [10 x i32], [10 x i32]* %x, i64 %idxprom4
+  %idxprom6 = sext i32 %k.02 to i64
+  %arrayidx7 = getelementptr inbounds [10 x i32], [10 x i32]* %arrayidx5, i64 0, i64 %idxprom6
+  %2 = load i32, i32* %arrayidx7, align 4
   %add8 = add nsw i32 %2, %add
-  store i32 %add8, ptr %arrayidx7, align 4
+  store i32 %add8, i32* %arrayidx7, align 4
   %inc = add nsw i32 %i.01, 1
-  %inc.ext = sext i32 %inc to i64
-  %cmp2 = icmp slt i64 %inc.ext, %m
+  %cmp2 = icmp slt i32 %inc, 10
   br i1 %cmp2, label %for.body3, label %for.end, !llvm.loop !0
 
 for.end:
   %inc10 = add nsw i32 %k.02, 1
-  %inc10.ext = sext i32 %inc10 to i64
-  %cmp = icmp slt i64 %inc10.ext, %n
+  %cmp = icmp slt i32 %inc10, 10
   br i1 %cmp, label %for.body, label %for.end11, !llvm.loop !2
 
 for.end11:

diff  --git a/llvm/test/Transforms/LoopInterchange/call-instructions.ll b/llvm/test/Transforms/LoopInterchange/call-instructions.ll
index 40f5bf4e3aba0..d945fe6d720c9 100644
--- a/llvm/test/Transforms/LoopInterchange/call-instructions.ll
+++ b/llvm/test/Transforms/LoopInterchange/call-instructions.ll
@@ -1,9 +1,10 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
 ; RUN:     -verify-dom-info -verify-loop-info -stats 2>&1 | FileCheck -check-prefix=STATS %s
 ; RUN: FileCheck --input-file=%t %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 @A = common global [100 x [100 x i32]] zeroinitializer
 

diff  --git a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
index f2b4cabb26615..82c16555f44f9 100644
--- a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
+++ b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
@@ -1,14 +1,15 @@
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' \
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' \
 ; RUN:   -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
 ; RUN: FileCheck --input-file=%t %s
 
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' \
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' \
 ; RUN:   -da-disable-delinearization-checks -pass-remarks-output=%t             \
 ; RUN:   -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
 ; RUN: FileCheck --check-prefix=DELIN --input-file=%t %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
+target triple = "x86_64-unknown-linux-gnu"
+ 
 @A = common global [100 x [100 x i32]] zeroinitializer
 @B = common global [100 x [100 x [100 x i32]]] zeroinitializer
 @C = common global [100 x [100 x i64]] zeroinitializer

diff  --git a/llvm/test/Transforms/LoopInterchange/debuginfo.ll b/llvm/test/Transforms/LoopInterchange/debuginfo.ll
index 6a55a4fed0d19..e2187b9328142 100644
--- a/llvm/test/Transforms/LoopInterchange/debuginfo.ll
+++ b/llvm/test/Transforms/LoopInterchange/debuginfo.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks='loop-interchange' -pass-remarks-output=%t -S \
 ; RUN:     -verify-dom-info -verify-loop-info | FileCheck %s
 ; RUN: FileCheck -check-prefix=REMARK --input-file=%t %s
 
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 @A = common global [100 x [100 x i64]] zeroinitializer
 

diff  --git a/llvm/test/Transforms/LoopInterchange/inner-indvar-depend-on-outer-indvar.ll b/llvm/test/Transforms/LoopInterchange/inner-indvar-depend-on-outer-indvar.ll
index 936591f85f525..5e3c59e803011 100644
--- a/llvm/test/Transforms/LoopInterchange/inner-indvar-depend-on-outer-indvar.ll
+++ b/llvm/test/Transforms/LoopInterchange/inner-indvar-depend-on-outer-indvar.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
 ; RUN:     -S -debug 2>&1 | FileCheck %s
 
 @A = common global [100 x [100 x i64]] zeroinitializer

diff  --git a/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll b/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
index af84da4ce084f..d0eac6d65e826 100644
--- a/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
+++ b/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
 ; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 | FileCheck -check-prefix=IR %s
 ; RUN: FileCheck --input-file=%t %s
 

diff  --git a/llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll b/llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll
index 2bf1237931bf5..65f1290101ca2 100644
--- a/llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll
+++ b/llvm/test/Transforms/LoopInterchange/innermost-latch-uses-values-in-middle-header.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
 ; RUN:     -S -debug 2>&1 | FileCheck %s
 
 @a = common global i32 0, align 4

diff  --git a/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll b/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
index 9997f4180843d..23f7d912ffa23 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
@@ -1,8 +1,9 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
 ; RUN:     -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 @A = common global [100 x [100 x i32]] zeroinitializer
 @B = common global [100 x i32] zeroinitializer

diff  --git a/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll b/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll
index 0e5e69c028e0c..fa2f112e71198 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
 ; RUN:     -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s
 
 @A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16

diff  --git a/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll b/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
index 8ea2e47b2a07c..f8e2d6b45a196 100644
--- a/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchange-no-deps.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -loop-interchange -cache-line-size=64 -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -pass-remarks-output=%t \
+; RUN: opt < %s -loop-interchange -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -pass-remarks-output=%t \
 ; RUN:     -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange -stats -S 2>&1 \
 ; RUN:     | FileCheck -check-prefix=STATS %s
 ; RUN: FileCheck -input-file %t %s
@@ -34,6 +34,35 @@ exit:                                 ; preds = %for1.inc
 
 }
 
+; Only the inner loop induction variable is used for memory accesses.
+; Interchanging is not beneficial.
+; CHECK:      Name:       InterchangeNotProfitable
+; CHECK-NEXT: Function:   no_bad_order
+define i32 @no_bad_order(i32* %Arr) {
+entry:
+  br label %for1.header
+
+for1.header:                                         ; preds = %entry, %for1.inc
+  %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for1.inc ]
+  br label %for2
+
+for2:                                        ; preds = %for1.header, %for2
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next, %for2 ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %Arr, i64 %indvars.iv
+  store i32 0, i32* %arrayidx6, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for2, label %for1.inc
+
+for1.inc:
+  %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+  %exitcond21 = icmp ne i64 %indvars.iv.next20, 1024
+  br i1 %exitcond21, label %for1.header, label %exit
+
+exit:                                 ; preds = %for1.inc
+  ret i32 0
+}
+
 ; No memory access using any induction variables, interchanging not beneficial.
 ; CHECK:      Name:        InterchangeNotProfitable
 ; CHECK-NEXT: Function:    no_mem_instrs

diff  --git a/llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll b/llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll
index c771a040f0b7a..830d0a26c9507 100644
--- a/llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
 
 @b = common dso_local local_unnamed_addr global [200 x [200 x i32]] zeroinitializer, align 4
 @a = common dso_local local_unnamed_addr global i32 0, align 4

diff  --git a/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll b/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
index 6148dc5d83b49..3b12cf443cb9a 100644
--- a/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s --basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s --basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
 
 @b = constant [200 x [100 x i32]] zeroinitializer, align 4
 @a = constant i32 0, align 4

diff  --git a/llvm/test/Transforms/LoopInterchange/interchangeable.ll b/llvm/test/Transforms/LoopInterchange/interchangeable.ll
index 74c953389c84b..00261aa99b032 100644
--- a/llvm/test/Transforms/LoopInterchange/interchangeable.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchangeable.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
-; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 @A = common global [100 x [100 x i64]] zeroinitializer
 @B = common global [100 x i64] zeroinitializer

diff  --git a/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll b/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll
index a33b0a8c9a799..fa5db55ba5cf5 100644
--- a/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll
+++ b/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-3.ll
@@ -1,8 +1,9 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
 ; RUN:     -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 @D = common global [100 x [100 x [100 x i32]]] zeroinitializer
 
@@ -23,31 +24,31 @@ entry:
   br label %for.cond1.preheader
 
 for.cond1.preheader:                              ; preds = %for.inc15, %entry
-  %i.028 = phi i64 [ 0, %entry ], [ %inc16, %for.inc15 ]
+  %i.028 = phi i32 [ 0, %entry ], [ %inc16, %for.inc15 ]
   br label %for.cond4.preheader
 
 for.cond4.preheader:                              ; preds = %for.inc12, %for.cond1.preheader
-  %j.027 = phi i64 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
+  %j.027 = phi i32 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
   br label %for.body6
 
 for.body6:                                        ; preds = %for.body6, %for.cond4.preheader
-  %k.026 = phi i64 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
-  %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i64 0, i64 %k.026, i64 %j.027, i64 %i.028
+  %k.026 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
+  %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %k.026, i32 %j.027, i32 %i.028
   %0 = load i32, i32* %arrayidx8
   %add = add nsw i32 %0, %t
   store i32 %add, i32* %arrayidx8
-  %inc = add nuw nsw i64 %k.026, 1
-  %exitcond = icmp eq i64 %inc, 100
+  %inc = add nuw nsw i32 %k.026, 1
+  %exitcond = icmp eq i32 %inc, 100
   br i1 %exitcond, label %for.inc12, label %for.body6
 
 for.inc12:                                        ; preds = %for.body6
-  %inc13 = add nuw nsw i64 %j.027, 1
-  %exitcond29 = icmp eq i64 %inc13, 100
+  %inc13 = add nuw nsw i32 %j.027, 1
+  %exitcond29 = icmp eq i32 %inc13, 100
   br i1 %exitcond29, label %for.inc15, label %for.cond4.preheader
 
 for.inc15:                                        ; preds = %for.inc12
-  %inc16 = add nuw nsw i64 %i.028, 1
-  %exitcond30 = icmp eq i64 %inc16, 100
+  %inc16 = add nuw nsw i32 %i.028, 1
+  %exitcond30 = icmp eq i32 %inc16, 100
   br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
 
 for.end17:                                        ; preds = %for.inc15

diff  --git a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
index a188f0e064eb8..ab616eea97402 100644
--- a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
+++ b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -S | FileCheck %s
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -da-disable-delinearization-checks -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -S | FileCheck -check-prefix=CHECK-DELIN %s
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -basic-aa -loop-interchange -da-disable-delinearization-checks -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -S | FileCheck -check-prefix=CHECK-DELIN %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 

diff  --git a/llvm/test/Transforms/LoopInterchange/lcssa.ll b/llvm/test/Transforms/LoopInterchange/lcssa.ll
index 92db58650b467..9809689bb0494 100644
--- a/llvm/test/Transforms/LoopInterchange/lcssa.ll
+++ b/llvm/test/Transforms/LoopInterchange/lcssa.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -pass-remarks-output=%t -S
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -pass-remarks-output=%t -S
 ; RUN: FileCheck --input-file %t --check-prefix REMARK %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 @A = common global [100 x [100 x i32]] zeroinitializer
 @C = common global [100 x [100 x i32]] zeroinitializer

diff  --git a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
index 9ef25826579c3..388f237db347d 100644
--- a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
+++ b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
@@ -1,11 +1,11 @@
 ; Test optimization remarks generated by the LoopInterchange pass.
 ;
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
 ; RUN:     -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \
 ; RUN:     -pass-remarks='loop-interchange' -S
 ; RUN: cat %t |  FileCheck %s
 
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
 ; RUN:     -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \
 ; RUN:     -pass-remarks='loop-interchange' -S -da-disable-delinearization-checks
 ; RUN: cat %t |  FileCheck --check-prefix=DELIN %s
@@ -71,7 +71,11 @@ for.end19:
 ; DELIN-NEXT: Name:            InterchangeNotProfitable
 ; DELIN-NEXT: Function:        test01
 ; DELIN-NEXT: Args:
-; DELIN-NEXT:   - String:          Interchanging loops is too costly and it does not improve parallelism.
+; DELIN-NEXT:   - String:          'Interchanging loops is too costly (cost='
+; DELIN-NEXT:   - Cost:            '2'
+; DELIN-NEXT:   - String:          ', threshold='
+; DELIN-NEXT:   - Threshold:       '0'
+; DELIN-NEXT:   - String:          ') and it does not improve parallelism.'
 ; DELIN-NEXT: ...
 
 ;;--------------------------------------Test case 02------------------------------------

diff  --git a/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll b/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
index c827a8df19475..3bf1e214cfefb 100644
--- a/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
+++ b/llvm/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
@@ -1,8 +1,9 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
 ; RUN:     -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 @A = common global [100 x [100 x i32]] zeroinitializer
 @B = common global [100 x i32] zeroinitializer

diff  --git a/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll b/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
index 876f001440e38..46aa4b5b4e76b 100644
--- a/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
+++ b/llvm/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
@@ -1,8 +1,9 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
 ; RUN:     -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 @D = common global [100 x [100 x [100 x i32]]] zeroinitializer
 
@@ -23,31 +24,31 @@ entry:
   br label %for.cond1.preheader
 
 for.cond1.preheader:                              ; preds = %for.inc15, %entry
-  %i.028 = phi i64 [ 0, %entry ], [ %inc16, %for.inc15 ]
+  %i.028 = phi i32 [ 0, %entry ], [ %inc16, %for.inc15 ]
   br label %for.cond4.preheader
 
 for.cond4.preheader:                              ; preds = %for.inc12, %for.cond1.preheader
-  %j.027 = phi i64 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
+  %j.027 = phi i32 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
   br label %for.body6
 
 for.body6:                                        ; preds = %for.body6, %for.cond4.preheader
-  %k.026 = phi i64 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
-  %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i64 %i.028, i64 %k.026, i64 %j.027
+  %k.026 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
+  %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %i.028, i32 %k.026, i32 %j.027
   %0 = load i32, i32* %arrayidx8
   %add = add nsw i32 %0, %t
   store i32 %add, i32* %arrayidx8
-  %inc = add nuw nsw i64 %k.026, 1
-  %exitcond = icmp eq i64 %inc, 100
+  %inc = add nuw nsw i32 %k.026, 1
+  %exitcond = icmp eq i32 %inc, 100
   br i1 %exitcond, label %for.inc12, label %for.body6
 
 for.inc12:                                        ; preds = %for.body6
-  %inc13 = add nuw nsw i64 %j.027, 1
-  %exitcond29 = icmp eq i64 %inc13, 100
+  %inc13 = add nuw nsw i32 %j.027, 1
+  %exitcond29 = icmp eq i32 %inc13, 100
   br i1 %exitcond29, label %for.inc15, label %for.cond4.preheader
 
 for.inc15:                                        ; preds = %for.inc12
-  %inc16 = add nuw nsw i64 %i.028, 1
-  %exitcond30 = icmp eq i64 %inc16, 100
+  %inc16 = add nuw nsw i32 %i.028, 1
+  %exitcond30 = icmp eq i32 %inc16, 100
   br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
 
 for.end17:                                        ; preds = %for.inc15

diff  --git a/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll b/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
index f09c679bc5bf9..82f661502f573 100644
--- a/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
+++ b/llvm/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
@@ -1,8 +1,9 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
 ; RUN:     -S -debug 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 @A = common global [100 x [100 x i32]] zeroinitializer
 @B = common global [100 x i32] zeroinitializer
@@ -107,13 +108,13 @@ for.end12:
 ;; The outer loop header does not branch to the inner loop preheader, or the
 ;; inner loop header, or the outer loop latch.
 ; CHECK: Not interchanging loops. Cannot prove legality.
-define void @interchange_07(i32 %k, i32 %N, i64 %ny) {
+define void @interchange_07(i32 %k, i32 %N, i32 %ny) {
 entry:
   br label %for1.header
 
 for1.header:
-  %j23 = phi i64 [ 0, %entry ], [ %j.next24, %for1.inc10 ]
-  %cmp21 = icmp slt i64 0, %ny
+  %j23 = phi i32 [ 0, %entry ], [ %j.next24, %for1.inc10 ]
+  %cmp21 = icmp slt i32 0, %ny
   br label %singleSucc
 
 singleSucc:
@@ -123,18 +124,18 @@ preheader.j:
   br label %for2
 
 for2:
-  %j = phi i64 [ %j.next, %for2 ], [ 0, %preheader.j ]
-  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %j, i64 %j23
+  %j = phi i32 [ %j.next, %for2 ], [ 0, %preheader.j ]
+  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i32 0, i32 %j, i32 %j23
   %lv = load i32, i32* %arrayidx5
   %add = add nsw i32 %lv, %k
   store i32 %add, i32* %arrayidx5
-  %j.next = add nuw nsw i64 %j, 1
-  %exitcond = icmp eq i64 %j, 99
+  %j.next = add nuw nsw i32 %j, 1
+  %exitcond = icmp eq i32 %j, 99
   br i1 %exitcond, label %for1.inc10, label %for2
 
 for1.inc10:
-  %j.next24 = add nuw nsw i64 %j23, 1
-  %exitcond26 = icmp eq i64 %j23, 99
+  %j.next24 = add nuw nsw i32 %j23, 1
+  %exitcond26 = icmp eq i32 %j23, 99
   br i1 %exitcond26, label %for.end12, label %for1.header
 
 for.end12:

diff  --git a/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll b/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll
index bdf46ca8b2e6f..3a7c82b83bfc6 100644
--- a/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll
+++ b/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basic-aa -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-loop-lcssa -S %s | FileCheck %s
+; RUN: opt -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-loop-lcssa -S %s | FileCheck %s
 
 @b = global [3 x [5 x [8 x i16]]] [[5 x [8 x i16]] zeroinitializer, [5 x [8 x i16]] [[8 x i16] zeroinitializer, [8 x i16] [i16 0, i16 0, i16 0, i16 6, i16 1, i16 6, i16 0, i16 0], [8 x i16] zeroinitializer, [8 x i16] zeroinitializer, [8 x i16] zeroinitializer], [5 x [8 x i16]] zeroinitializer], align 2
 @a = common global i32 0, align 4

diff  --git a/llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll b/llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll
index 3dd291a0ab9c5..7e9db5a4d4e59 100644
--- a/llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll
+++ b/llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
 ; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 | FileCheck -check-prefix=IR %s
 ; RUN: FileCheck --input-file=%t %s
 

diff  --git a/llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll b/llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll
index 653c97d8c07c7..77546020bf95f 100644
--- a/llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll
+++ b/llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-interchange -cache-line-size=64 -loop-interchange-threshold=-100 -verify-loop-lcssa -S | FileCheck %s
+; RUN: opt < %s -loop-interchange -loop-interchange-threshold=-100 -verify-loop-lcssa -S | FileCheck %s
 
 ; Test case for PR41725. The induction variables in the latches escape the
 ; loops and we must move some PHIs around.

diff  --git a/llvm/test/Transforms/LoopInterchange/phi-ordering.ll b/llvm/test/Transforms/LoopInterchange/phi-ordering.ll
index c6fb18db9bbb3..78e333b83d3a4 100644
--- a/llvm/test/Transforms/LoopInterchange/phi-ordering.ll
+++ b/llvm/test/Transforms/LoopInterchange/phi-ordering.ll
@@ -1,9 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=0 -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=0 -S 2>&1 | FileCheck %s
 ;; Checks the order of the inner phi nodes does not cause havoc.
 ;; The inner loop has a reduction into c. The IV is not the first phi.
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv8--linux-gnueabihf"
 
 
 
@@ -30,7 +31,7 @@ define void @test(i32 %T, [90 x i32]* noalias nocapture %C, [90 x [90 x i16]]* n
 ; CHECK-NEXT:    br label [[FOR1_HEADER_PREHEADER]]
 ; CHECK:       for3.split1:
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]]* [[A:%.*]], i32 [[ADD]], i32 [[I]], i32 [[J]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]]* [[A:%.*]], i32 [[ADD]], i32 [[J]], i32 [[I]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[ADD15:%.*]] = add nsw i16 [[TMP0]], 1
 ; CHECK-NEXT:    store i16 [[ADD15]], i16* [[ARRAYIDX]]
@@ -69,7 +70,7 @@ for2.header:                                  ; preds = %for2.inc16, %for1.heade
 for3:                                        ; preds = %for3, %for2.header
   %k = phi i32 [ 1, %for2.header ], [ %inc, %for3 ]
   %add = add nsw i32 %k, %mul
-  %arrayidx = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]]* %A, i32 %add, i32 %i, i32 %j
+  %arrayidx = getelementptr inbounds [90 x [90 x i16]], [90 x [90 x i16]]* %A, i32 %add, i32 %j, i32 %i
   %0 = load i16, i16* %arrayidx, align 2
   %add15 = add nsw i16 %0, 1
   store i16 %add15, i16* %arrayidx

diff  --git a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll
index ff3e6bc3c18f0..ef0c5ad6cb058 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -cache-line-size=64 -verify-loop-lcssa -verify-dom-info -S %s | FileCheck %s
+; RUN: opt -loop-interchange -verify-loop-lcssa -verify-dom-info -S %s | FileCheck %s
 
 @b = external dso_local global [5 x i32], align 16
 

diff  --git a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
index 48773d0254aae..443f61fbe2a0a 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
 ; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1
 ; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
 

diff  --git a/llvm/test/Transforms/LoopInterchange/pr43326.ll b/llvm/test/Transforms/LoopInterchange/pr43326.ll
index bf0da367130a3..68bcc4e74cecc 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43326.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43326.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
 ; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1
 ; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
 

diff  --git a/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll b/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll
index 5f598e34a7aef..9ae5f42e1ae7e 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -cache-line-size=64 -S < %s | FileCheck %s
+; RUN: opt -loop-interchange -S < %s | FileCheck %s
 
 ; Test cases for PR43473.
 

diff  --git a/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll b/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll
index fcb5ca6602241..445fffecc2f37 100644
--- a/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr43797-lcssa-for-multiple-outer-loop-blocks.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -cache-line-size=64 -verify-loop-lcssa -S %s | FileCheck %s
+; RUN: opt -loop-interchange -verify-loop-lcssa -S %s | FileCheck %s
 
 ; Tests for PR43797.
 

diff  --git a/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll b/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll
index 2cc6ce960de42..7336477615249 100644
--- a/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -cache-line-size=64 -S %s | FileCheck %s
+; RUN: opt -loop-interchange -S %s | FileCheck %s
 
 @global = external local_unnamed_addr global [400 x [400 x i32]], align 16
 

diff  --git a/llvm/test/Transforms/LoopInterchange/pr48212.ll b/llvm/test/Transforms/LoopInterchange/pr48212.ll
index fb18ece4e71b5..b6894bc0ba302 100644
--- a/llvm/test/Transforms/LoopInterchange/pr48212.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr48212.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
 ; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1
 ; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
 

diff  --git a/llvm/test/Transforms/LoopInterchange/profitability.ll b/llvm/test/Transforms/LoopInterchange/profitability.ll
index 983bbd6e83ad2..79706d8afe6a0 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability.ll
@@ -1,10 +1,11 @@
-; RUN: opt < %s -loop-interchange -cache-line-size=64 -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
+; RUN: opt < %s -loop-interchange -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
 ; RUN:     -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange
 ; RUN: FileCheck -input-file %t %s
 
 ;; We test profitability model in these test cases.
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 @A = common global [100 x [100 x i32]] zeroinitializer
 @B = common global [100 x [100 x i32]] zeroinitializer

diff  --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
index 225b4bbcbc106..89b734d5f82fa 100644
--- a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
+++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -basic-aa -loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
 ; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 | FileCheck %s
 ; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
 
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 ; REMARKS: --- !Passed
 ; REMARKS-NEXT: Pass:            loop-interchange

diff  --git a/llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll b/llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll
index 5599a093cd4f3..3f178443ee6d1 100644
--- a/llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll
+++ b/llvm/test/Transforms/LoopInterchange/update-condbranch-duplicate-successors.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -cache-line-size=64 -S %s | FileCheck %s
+; RUN: opt -loop-interchange -S %s | FileCheck %s
 
 
 @global = external dso_local global [1000 x [1000 x i32]], align 16

diff  --git a/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll b/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
index 587b63861da34..3bc69c3d2b0ff 100644
--- a/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
+++ b/llvm/test/Transforms/LoopInterchange/vector-gep-operand.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-interchange -cache-line-size=64 -loop-interchange-threshold=-10 -S %s | FileCheck %s
+; RUN: opt -loop-interchange -loop-interchange-threshold=-10 -S %s | FileCheck %s
 
 ; The test contains a GEP with an operand that is not SCEV-able. Make sure
 ; loop-interchange does not crash.