[llvm] r358552 - Revert "Temporarily Revert "Add basic loop fusion pass.""

Tue Apr 16 21:53:01 PDT 2019

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll?rev=358552&view=auto
==============================================================================

--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: opt < %s -loop-reduce -mcpu=btver2  -S | FileCheck %s --check-prefix=JAG
+; RUN: opt < %s -loop-reduce -mcpu=bdver2  -S | FileCheck %s --check-prefix=BUL
+; RUN: opt < %s -loop-reduce -mcpu=haswell -S | FileCheck %s --check-prefix=HSW
+
+; RUN: llc < %s                     | FileCheck %s --check-prefix=BASE
+; RUN: llc < %s -mattr=macrofusion  | FileCheck %s --check-prefix=FUSE
+; RUN: llc < %s -mattr=branchfusion | FileCheck %s --check-prefix=FUSE
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; PR35681 - https://bugs.llvm.org/show_bug.cgi?id=35681
+; FIXME: If a CPU can macro-fuse a compare and branch, then we discount that
+; cost in LSR and avoid generating large offsets in each memory access.
+; This reduces code size and may improve decode throughput.
+
+define void @maxArray(double* noalias nocapture %x, double* noalias nocapture readonly %y) {
+; JAG-LABEL: @maxArray(
+; JAG-NEXT:  entry:
+; JAG-NEXT:    [[Y1:%.*]] = bitcast double* [[Y:%.*]] to i8*
+; JAG-NEXT:    [[X3:%.*]] = bitcast double* [[X:%.*]] to i8*
+; JAG-NEXT:    br label [[VECTOR_BODY:%.*]]
+; JAG:       vector.body:
+; JAG-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ -524288, [[ENTRY:%.*]] ]
+; JAG-NEXT:    [[UGLYGEP7:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
+; JAG-NEXT:    [[UGLYGEP78:%.*]] = bitcast i8* [[UGLYGEP7]] to <2 x double>*
+; JAG-NEXT:    [[SCEVGEP9:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP78]], i64 32768
+; JAG-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y1]], i64 [[LSR_IV]]
+; JAG-NEXT:    [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>*
+; JAG-NEXT:    [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP2]], i64 32768
+; JAG-NEXT:    [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP9]], align 8
+; JAG-NEXT:    [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP]], align 8
+; JAG-NEXT:    [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
+; JAG-NEXT:    [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
+; JAG-NEXT:    [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]]
+; JAG-NEXT:    [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to <2 x double>*
+; JAG-NEXT:    [[SCEVGEP6:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP45]], i64 32768
+; JAG-NEXT:    store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP6]], align 8
+; JAG-NEXT:    [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 16
+; JAG-NEXT:    [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
+; JAG-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; JAG:       exit:
+; JAG-NEXT:    ret void
+;
+; BUL-LABEL: @maxArray(
+; BUL-NEXT:  entry:
+; BUL-NEXT:    br label [[VECTOR_BODY:%.*]]
+; BUL:       vector.body:
+; BUL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; BUL-NEXT:    [[SCEVGEP4:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[INDEX]]
+; BUL-NEXT:    [[SCEVGEP45:%.*]] = bitcast double* [[SCEVGEP4]] to <2 x double>*
+; BUL-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, double* [[Y:%.*]], i64 [[INDEX]]
+; BUL-NEXT:    [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to <2 x double>*
+; BUL-NEXT:    [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP45]], align 8
+; BUL-NEXT:    [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP1]], align 8
+; BUL-NEXT:    [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
+; BUL-NEXT:    [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
+; BUL-NEXT:    [[SCEVGEP2:%.*]] = getelementptr double, double* [[X]], i64 [[INDEX]]
+; BUL-NEXT:    [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to <2 x double>*
+; BUL-NEXT:    store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP23]], align 8
+; BUL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; BUL-NEXT:    [[DONE:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
+; BUL-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; BUL:       exit:
+; BUL-NEXT:    ret void
+;
+; HSW-LABEL: @maxArray(
+; HSW-NEXT:  entry:
+; HSW-NEXT:    br label [[VECTOR_BODY:%.*]]
+; HSW:       vector.body:
+; HSW-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; HSW-NEXT:    [[SCEVGEP4:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[INDEX]]
+; HSW-NEXT:    [[SCEVGEP45:%.*]] = bitcast double* [[SCEVGEP4]] to <2 x double>*
+; HSW-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, double* [[Y:%.*]], i64 [[INDEX]]
+; HSW-NEXT:    [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to <2 x double>*
+; HSW-NEXT:    [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP45]], align 8
+; HSW-NEXT:    [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP1]], align 8
+; HSW-NEXT:    [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]]
+; HSW-NEXT:    [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]]
+; HSW-NEXT:    [[SCEVGEP2:%.*]] = getelementptr double, double* [[X]], i64 [[INDEX]]
+; HSW-NEXT:    [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to <2 x double>*
+; HSW-NEXT:    store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP23]], align 8
+; HSW-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; HSW-NEXT:    [[DONE:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
+; HSW-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; HSW:       exit:
+; HSW-NEXT:    ret void
+;
+; BASE-LABEL: maxArray:
+; BASE:       # %bb.0: # %entry
+; BASE-NEXT:    movq $-524288, %rax # imm = 0xFFF80000
+; BASE-NEXT:    .p2align 4, 0x90
+; BASE-NEXT:  .LBB0_1: # %vector.body
+; BASE-NEXT:    # =>This Inner Loop Header: Depth=1
+; BASE-NEXT:    movupd 524288(%rdi,%rax), %xmm0
+; BASE-NEXT:    movupd 524288(%rsi,%rax), %xmm1
+; BASE-NEXT:    maxpd %xmm0, %xmm1
+; BASE-NEXT:    movupd %xmm1, 524288(%rdi,%rax)
+; BASE-NEXT:    addq $16, %rax
+; BASE-NEXT:    jne .LBB0_1
+; BASE-NEXT:  # %bb.2: # %exit
+; BASE-NEXT:    retq
+; FUSE-LABEL: maxArray:
+; FUSE:       # %bb.0: # %entry
+; FUSE-NEXT:    xorl %eax, %eax
+; FUSE-NEXT:    .p2align 4, 0x90
+; FUSE-NEXT:  .LBB0_1: # %vector.body
+; FUSE-NEXT:    # =>This Inner Loop Header: Depth=1
+; FUSE-NEXT:    movupd (%rdi,%rax,8), %xmm0
+; FUSE-NEXT:    movupd (%rsi,%rax,8), %xmm1
+; FUSE-NEXT:    maxpd %xmm0, %xmm1
+; FUSE-NEXT:    movupd %xmm1, (%rdi,%rax,8)
+; FUSE-NEXT:    addq $2, %rax
+; FUSE-NEXT:    cmpq $65536, %rax # imm = 0x10000
+; FUSE-NEXT:    jne .LBB0_1
+; FUSE-NEXT:  # %bb.2: # %exit
+; FUSE-NEXT:    retq
+entry:
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %gepx = getelementptr inbounds double, double* %x, i64 %index
+  %gepy = getelementptr inbounds double, double* %y, i64 %index
+  %xptr = bitcast double* %gepx to <2 x double>*
+  %yptr = bitcast double* %gepy to <2 x double>*
+  %xval = load <2 x double>, <2 x double>* %xptr, align 8
+  %yval = load <2 x double>, <2 x double>* %yptr, align 8
+  %cmp = fcmp ogt <2 x double> %yval, %xval
+  %max = select <2 x i1> %cmp, <2 x double> %yval, <2 x double> %xval
+  %xptr_again = bitcast double* %gepx to <2 x double>*
+  store <2 x double> %max, <2 x double>* %xptr_again, align 8
+  %index.next = add i64 %index, 2
+  %done = icmp eq i64 %index.next, 65536
+  br i1 %done, label %exit, label %vector.body
+
+exit:
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
+; Check when we use an outerloop induction variable inside of an innerloop
+; induction value expr, LSR can still choose to use single induction variable
+; for the innerloop and share it in multiple induction value exprs.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8* %maxarray) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP215:%.*]] = icmp sgt i32 [[SIZE:%.*]], 1
+; CHECK-NEXT:    [[T0:%.*]] = zext i32 [[SIZE]] to i64
+; CHECK-NEXT:    [[T1:%.*]] = sext i32 [[NSTEPS:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[T0]], -1
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[FOR_INC:%.*]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    br i1 [[CMP215]], label [[FOR_BODY2_PREHEADER:%.*]], label [[FOR_INC]]
+; CHECK:       for.body2.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY2:%.*]]
+; CHECK:       for.body2:
+; CHECK-NEXT:    [[LSR_IV3:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[FOR_BODY2]] ], [ [[MAXARRAY:%.*]], [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY2]] ], [ [[TMP0]], [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 1
+; CHECK-NEXT:    [[V1:%.*]] = load i8, i8* [[SCEVGEP6]], align 1
+; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 [[TMP0]]
+; CHECK-NEXT:    [[V2:%.*]] = load i8, i8* [[SCEVGEP5]], align 1
+; CHECK-NEXT:    [[TMPV:%.*]] = xor i8 [[V1]], [[V2]]
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i8, i8* [[LSR_IV3]], i64 [[LSR_IV1]]
+; CHECK-NEXT:    store i8 [[TMPV]], i8* [[SCEVGEP4]], align 1
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV3]], i64 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_INC_LOOPEXIT:%.*]]
+; CHECK:       for.inc.loopexit:
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
+; CHECK-NEXT:    [[LSR_IV_NEXT2]] = add nuw nsw i64 [[LSR_IV1]], [[T0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT3]], [[T1]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp215 = icmp sgt i32 %size, 1
+  %t0 = zext i32 %size to i64
+  %t1 = sext i32 %nsteps to i64
+  %sub2 = sub i64 %t0, 2
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %indvars.iv2 = phi i64 [ %indvars.iv.next3, %for.inc ], [ 0, %entry ]
+  %t2 = mul nsw i64 %indvars.iv2, %t0
+  br i1 %cmp215, label %for.body2.preheader, label %for.inc
+
+for.body2.preheader:                              ; preds = %for.body
+  br label %for.body2
+
+; Check LSR only generates two induction variables for for.body2 one for compare and
+; one to shared by multiple array accesses.
+
+for.body2:                                        ; preds = %for.body2.preheader, %for.body2
+  %indvars.iv = phi i64 [ 1, %for.body2.preheader ], [ %indvars.iv.next, %for.body2 ]
+  %arrayidx1 = getelementptr inbounds i8, i8* %maxarray, i64 %indvars.iv
+  %v1 = load i8, i8* %arrayidx1, align 1
+  %idx2 = add nsw i64 %indvars.iv, %sub2
+  %arrayidx2 = getelementptr inbounds i8, i8* %maxarray, i64 %idx2
+  %v2 = load i8, i8* %arrayidx2, align 1
+  %tmpv = xor i8 %v1, %v2
+  %t4 = add nsw i64 %t2, %indvars.iv
+  %add.ptr = getelementptr inbounds i8, i8* %maxarray, i64 %t4
+  store i8 %tmpv, i8* %add.ptr, align 1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %wide.trip.count = zext i32 %size to i64
+  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.body2, label %for.inc.loopexit
+
+for.inc.loopexit:                                 ; preds = %for.body2
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc.loopexit, %for.body
+  %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
+  %cmp = icmp slt i64 %indvars.iv.next3, %t1
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.inc
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,50 @@
+; RUN: opt -S -loop-reduce -mcpu=corei7-avx -mtriple=x86_64-apple-macosx < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @indvar_expansion(i8* nocapture readonly %rowsptr) {
+entry:
+  br label %for.cond
+
+; SCEVExpander used to create induction variables in the loop %for.cond while
+; expanding the recurrence start value of loop strength reduced values from
+; %vector.body.
+
+; CHECK-LABEL: indvar_expansion
+; CHECK: for.cond:
+; CHECK-NOT: phi i3
+; CHECK: br i1 {{.+}}, label %for.cond
+
+for.cond:
+  %indvars.iv44 = phi i64 [ %indvars.iv.next45, %for.cond ], [ 0, %entry ]
+  %cmp = icmp eq i8 undef, 0
+  %indvars.iv.next45 = add nuw nsw i64 %indvars.iv44, 1
+  br i1 %cmp, label %for.cond, label %for.cond2
+
+for.cond2:
+  br i1 undef, label %for.cond2, label %for.body14.lr.ph
+
+for.body14.lr.ph:
+  %sext = shl i64 %indvars.iv44, 32
+  %0 = ashr exact i64 %sext, 32
+  %1 = sub i64 undef, %indvars.iv44
+  %2 = and i64 %1, 4294967295
+  %3 = add i64 %2, 1
+  %fold = add i64 %1, 1
+  %n.mod.vf = and i64 %fold, 7
+  %n.vec = sub i64 %3, %n.mod.vf
+  %end.idx.rnd.down = add i64 %n.vec, %0
+  br label %vector.body
+
+vector.body:
+  %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body14.lr.ph ]
+  %4 = getelementptr inbounds i8, i8* %rowsptr, i64 %index
+  %5 = bitcast i8* %4 to <4 x i8>*
+  %wide.load = load <4 x i8>, <4 x i8>* %5, align 1
+  %index.next = add i64 %index, 8
+  %6 = icmp eq i64 %index.next, %end.idx.rnd.down
+  br i1 %6, label %for.end24, label %vector.body
+
+for.end24:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr17473.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr17473.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr17473.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; LSR shouldn't normalize IV if it can't be denormalized to the original
+; expression.  In this testcase, the normalized expression was denormalized to
+; an expression different from the original, and we were losing sign extension.
+
+; CHECK:    [[TMP:%[a-z]+]] = trunc i32 {{.*}} to i8
+; CHECK:     {{%[a-z0-9]+}} = sext i8 [[TMP]] to i32
+
+ at j = common global i32 0, align 4
+ at c = common global i32 0, align 4
+ at g = common global i32 0, align 4
+ at h = common global i8 0, align 1
+ at d = common global i32 0, align 4
+ at i = common global i32 0, align 4
+ at e = common global i32 0, align 4
+ at .str = private unnamed_addr constant [4 x i8] c"%x\0A\00", align 1
+ at a = common global i32 0, align 4
+ at b = common global i16 0, align 2
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @main() #0 {
+entry:
+  store i8 0, i8* @h, align 1
+  %0 = load i32, i32* @j, align 4
+  %tobool.i = icmp eq i32 %0, 0
+  %1 = load i32, i32* @d, align 4
+  %cmp3 = icmp sgt i32 %1, -1
+  %.lobit = lshr i32 %1, 31
+  %.lobit.not = xor i32 %.lobit, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %fn3.exit
+  %inc9 = phi i8 [ 0, %entry ], [ %inc, %fn3.exit ]
+  %conv = sext i8 %inc9 to i32
+  br i1 %tobool.i, label %fn3.exit, label %land.rhs.i
+
+land.rhs.i:                                       ; preds = %for.body
+  store i32 0, i32* @c, align 4
+  br label %fn3.exit
+
+fn3.exit:                                         ; preds = %for.body, %land.rhs.i
+  %inc = add i8 %inc9, 1
+  %cmp = icmp sgt i8 %inc, -1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %fn3.exit
+  %.lobit.not. = select i1 %cmp3, i32 %.lobit.not, i32 0
+  store i32 %conv, i32* @g, align 4
+  store i32 %.lobit.not., i32* @i, align 4
+  store i8 %inc, i8* @h, align 1
+  %conv7 = sext i8 %inc to i32
+  %add = add nsw i32 %conv7, %conv
+  store i32 %add, i32* @e, align 4
+  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %add) #2
+  ret i32 0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr28719.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr28719.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr28719.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr28719.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at a = global i32 0, align 4
+ at b = global i8 0, align 1
+ at c = global [4 x i8] zeroinitializer, align 1
+
+; Just make sure we don't generate code with uses not dominated by defs.
+; CHECK-LABEL: @main(
+define i32 @main() {
+entry:
+  %a0 = load i32, i32* @a, align 4
+  %cmpa = icmp slt i32 %a0, 4
+  br i1 %cmpa, label %preheader, label %for.end
+
+preheader:
+  %b0 = load i8, i8* @b, align 1
+  %b0sext = sext i8 %b0 to i64
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %preheader ], [ %iv.next, %lor.false ]
+  %mul = mul nsw i64 %b0sext, %iv
+  %multrunc = trunc i64 %mul to i32
+  %cmp = icmp eq i32 %multrunc, 0
+  br i1 %cmp, label %lor.false, label %if.then
+
+lor.false:
+  %cgep = getelementptr inbounds [4 x i8], [4 x i8]* @c, i64 0, i64 %iv
+  %ci = load i8, i8* %cgep, align 1
+  %cisext = sext i8 %ci to i32
+  %ivtrunc = trunc i64 %iv to i32
+  %cmp2 = icmp eq i32 %cisext, %ivtrunc
+  %iv.next = add i64 %iv, 1
+  br i1 %cmp2, label %for.body, label %if.then
+
+if.then:
+  tail call void @abort()
+  unreachable
+
+for.end:
+  ret i32 0
+}
+
+declare void @abort()

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr40514.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr40514.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr40514.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @pluto(i32 %arg) #0 {
+; CHECK-LABEL: @pluto(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB10:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    store i64 [[LSR_IV_NEXT2:%.*]], i64 addrspace(1)* undef, align 8
+; CHECK-NEXT:    ret i32 [[LSR_IV_NEXT:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2]], [[BB10]] ], [ 9, [[BB:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT]], [[BB10]] ], [ undef, [[BB]] ]
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
+; CHECK-NEXT:    [[LSR_IV_NEXT2]] = add nuw nsw i64 [[LSR_IV1]], 1
+; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB10]]
+;
+
+bb:
+  br label %bb10
+
+bb1:                                              ; preds = %bb10
+  %tmp = and i64 %tmp24, 4294967295
+  %tmp2 = shl i64 %tmp23, 33
+  %tmp3 = ashr exact i64 %tmp2, 32
+  %tmp4 = add i64 undef, %tmp
+  %tmp5 = add i64 %tmp4, %tmp3
+  %tmp6 = add i64 %tmp5, undef
+  %tmp7 = add i64 %tmp6, undef
+  %tmp8 = add i64 undef, %tmp7
+  store i64 %tmp8, i64 addrspace(1)* undef, align 8
+  %tmp9 = trunc i64 %tmp7 to i32
+  ret i32 %tmp9
+
+bb10:                                             ; preds = %bb10, %bb
+  %tmp11 = phi i64 [ 9, %bb ], [ %tmp24, %bb10 ]
+  %tmp12 = shl i64 undef, 1
+  %tmp13 = mul i64 %tmp12, %tmp12
+  %tmp14 = shl i64 %tmp13, 1
+  %tmp15 = mul i64 %tmp14, %tmp14
+  %tmp16 = shl i64 %tmp15, 1
+  %tmp17 = mul i64 %tmp16, %tmp16
+  %tmp18 = shl i64 %tmp17, 1
+  %tmp19 = mul i64 %tmp18, %tmp18
+  %tmp20 = shl i64 %tmp19, 1
+  %tmp21 = mul i64 %tmp20, %tmp20
+  %tmp22 = shl i64 %tmp21, 1
+  %tmp23 = mul i64 %tmp22, %tmp22
+  %tmp24 = add nuw nsw i64 %tmp11, 1
+  br i1 undef, label %bb1, label %bb10
+}
+
+
+attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,97 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+; We find it is very bad to allow LSR formula containing SCEVAddRecExpr Reg
+; from siblings of current loop. When one loop is LSR optimized, it can
+; insert lsr.iv for other sibling loops, which sometimes leads to many extra
+; lsr.iv inserted for loops.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+ at cond = common local_unnamed_addr global i64 0, align 8
+
+; Check there is no extra lsr.iv generated in foo.
+; CHECK-LABEL: @foo(
+; CHECK-NOT: lsr.iv{{[0-9]+}} =
+;
+define void @foo(i64 %N) local_unnamed_addr {
+entry:
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %do.body ]
+  tail call void @goo(i64 %i.0, i64 %i.0)
+  %inc = add nuw nsw i64 %i.0, 1
+  %t0 = load i64, i64* @cond, align 8
+  %tobool = icmp eq i64 %t0, 0
+  br i1 %tobool, label %do.body2.preheader, label %do.body
+
+do.body2.preheader:                               ; preds = %do.body
+  br label %do.body2
+
+do.body2:                                         ; preds = %do.body2.preheader, %do.body2
+  %i.1 = phi i64 [ %inc3, %do.body2 ], [ 0, %do.body2.preheader ]
+  %j.1 = phi i64 [ %inc4, %do.body2 ], [ %inc, %do.body2.preheader ]
+  tail call void @goo(i64 %i.1, i64 %j.1)
+  %inc3 = add nuw nsw i64 %i.1, 1
+  %inc4 = add nsw i64 %j.1, 1
+  %t1 = load i64, i64* @cond, align 8
+  %tobool6 = icmp eq i64 %t1, 0
+  br i1 %tobool6, label %do.body8.preheader, label %do.body2
+
+do.body8.preheader:                               ; preds = %do.body2
+  br label %do.body8
+
+do.body8:                                         ; preds = %do.body8.preheader, %do.body8
+  %i.2 = phi i64 [ %inc9, %do.body8 ], [ 0, %do.body8.preheader ]
+  %j.2 = phi i64 [ %inc10, %do.body8 ], [ %inc4, %do.body8.preheader ]
+  tail call void @goo(i64 %i.2, i64 %j.2)
+  %inc9 = add nuw nsw i64 %i.2, 1
+  %inc10 = add nsw i64 %j.2, 1
+  %t2 = load i64, i64* @cond, align 8
+  %tobool12 = icmp eq i64 %t2, 0
+  br i1 %tobool12, label %do.body14.preheader, label %do.body8
+
+do.body14.preheader:                              ; preds = %do.body8
+  br label %do.body14
+
+do.body14:                                        ; preds = %do.body14.preheader, %do.body14
+  %i.3 = phi i64 [ %inc15, %do.body14 ], [ 0, %do.body14.preheader ]
+  %j.3 = phi i64 [ %inc16, %do.body14 ], [ %inc10, %do.body14.preheader ]
+  tail call void @goo(i64 %i.3, i64 %j.3)
+  %inc15 = add nuw nsw i64 %i.3, 1
+  %inc16 = add nsw i64 %j.3, 1
+  %t3 = load i64, i64* @cond, align 8
+  %tobool18 = icmp eq i64 %t3, 0
+  br i1 %tobool18, label %do.body20.preheader, label %do.body14
+
+do.body20.preheader:                              ; preds = %do.body14
+  br label %do.body20
+
+do.body20:                                        ; preds = %do.body20.preheader, %do.body20
+  %i.4 = phi i64 [ %inc21, %do.body20 ], [ 0, %do.body20.preheader ]
+  %j.4 = phi i64 [ %inc22, %do.body20 ], [ %inc16, %do.body20.preheader ]
+  tail call void @goo(i64 %i.4, i64 %j.4)
+  %inc21 = add nuw nsw i64 %i.4, 1
+  %inc22 = add nsw i64 %j.4, 1
+  %t4 = load i64, i64* @cond, align 8
+  %tobool24 = icmp eq i64 %t4, 0
+  br i1 %tobool24, label %do.body26.preheader, label %do.body20
+
+do.body26.preheader:                              ; preds = %do.body20
+  br label %do.body26
+
+do.body26:                                        ; preds = %do.body26.preheader, %do.body26
+  %i.5 = phi i64 [ %inc27, %do.body26 ], [ 0, %do.body26.preheader ]
+  %j.5 = phi i64 [ %inc28, %do.body26 ], [ %inc22, %do.body26.preheader ]
+  tail call void @goo(i64 %i.5, i64 %j.5)
+  %inc27 = add nuw nsw i64 %i.5, 1
+  %inc28 = add nsw i64 %j.5, 1
+  %t5 = load i64, i64* @cond, align 8
+  %tobool30 = icmp eq i64 %t5, 0
+  br i1 %tobool30, label %do.end31, label %do.body26
+
+do.end31:                                         ; preds = %do.body26
+  ret void
+}
+
+declare void @goo(i64, i64) local_unnamed_addr
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,88 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; CHECK: bb1:
+; CHECK: load double, double addrspace(1)* [[IV:%[^,]+]]
+; CHECK: store double {{.*}}, double addrspace(1)* [[IV]]
+
+; CHECK-NOT: cast
+; Make sure the GEP has the right index type
+; CHECK: getelementptr double, double addrspace(1)* [[IV]], i16 1
+; CHECK: br {{.*}} label %bb1
+
+; Make sure the GEP has the right index type
+; CHECK: getelementptr double, double addrspace(1)* {{.*}}, i16
+
+
+; This test tests several things. The load and store should use the
+; same address instead of having it computed twice, and SCEVExpander should
+; be able to reconstruct the full getelementptr, despite it having a few
+; obstacles set in its way.
+; We only check that the inner loop (bb1-bb2) is "reduced" because LSR
+; currently only operates on inner loops.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-n16:32:64"
+
+define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double addrspace(1)* nocapture %p) nounwind {
+entry:
+	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %bb.nph3, label %return
+
+bb.nph:		; preds = %bb2.preheader
+	%tmp1 = mul i64 %tmp16, %i.02		; <i64> [#uses=1]
+	%tmp2 = mul i64 %tmp19, %i.02		; <i64> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb2, %bb.nph
+	%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
+	%tmp3 = add i64 %j.01, %tmp1		; <i64> [#uses=1]
+	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
+        %z0 = add i64 %tmp3, 5203
+	%tmp5 = getelementptr double, double addrspace(1)* %p, i64 %z0		; <double addrspace(1)*> [#uses=1]
+	%tmp6 = load double, double addrspace(1)* %tmp5, align 8		; <double> [#uses=1]
+	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
+        %z1 = add i64 %tmp4, 5203
+	%tmp8 = getelementptr double, double addrspace(1)* %p, i64 %z1		; <double addrspace(1)*> [#uses=1]
+	store double %tmp7, double addrspace(1)* %tmp8, align 8
+	%tmp9 = add i64 %j.01, 1		; <i64> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb1
+	%tmp10 = icmp slt i64 %tmp9, %m		; <i1> [#uses=1]
+	br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.preheader, %bb2.bb3_crit_edge
+	%tmp11 = add i64 %i.02, 1		; <i64> [#uses=2]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%tmp12 = icmp slt i64 %tmp11, %n		; <i1> [#uses=1]
+	br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
+
+bb4.return_crit_edge:		; preds = %bb4
+	br label %bb4.return_crit_edge.split
+
+bb4.return_crit_edge.split:		; preds = %bb.nph3, %bb4.return_crit_edge
+	br label %return
+
+bb.nph3:		; preds = %entry
+	%tmp13 = icmp sgt i64 %m, 0		; <i1> [#uses=1]
+	%tmp14 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp15 = mul i64 %tmp14, %o		; <i64> [#uses=1]
+	%tmp16 = mul i64 %tmp15, %q		; <i64> [#uses=1]
+	%tmp17 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp18 = mul i64 %tmp17, %o		; <i64> [#uses=1]
+	%tmp19 = mul i64 %tmp18, %q		; <i64> [#uses=1]
+	br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
+
+bb.nph3.split:		; preds = %bb.nph3
+	br label %bb2.preheader
+
+bb2.preheader:		; preds = %bb.nph3.split, %bb4
+	%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ]		; <i64> [#uses=3]
+	br i1 true, label %bb.nph, label %bb3
+
+return:		; preds = %bb4.return_crit_edge.split, %entry
+	ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/addrec-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/addrec-gep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/addrec-gep.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/addrec-gep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,82 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; CHECK: bb1:
+; CHECK: load double, double* [[IV:%[^,]+]]
+; CHECK: store double {{.*}}, double* [[IV]]
+; CHECK: getelementptr double, double*
+; CHECK-NOT: cast
+; CHECK: br {{.*}} label %bb1
+
+; This test tests several things. The load and store should use the
+; same address instead of having it computed twice, and SCEVExpander should
+; be able to reconstruct the full getelementptr, despite it having a few
+; obstacles set in its way.
+; We only check that the inner loop (bb1-bb2) is "reduced" because LSR
+; currently only operates on inner loops.
+
+target datalayout = "e-p:64:64:64-n32:64"
+
+define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
+entry:
+	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %bb.nph3, label %return
+
+bb.nph:		; preds = %bb2.preheader
+	%tmp1 = mul i64 %tmp16, %i.02		; <i64> [#uses=1]
+	%tmp2 = mul i64 %tmp19, %i.02		; <i64> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb2, %bb.nph
+	%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
+	%tmp3 = add i64 %j.01, %tmp1		; <i64> [#uses=1]
+	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
+        %z0 = add i64 %tmp3, 5203
+	%tmp5 = getelementptr double, double* %p, i64 %z0		; <double*> [#uses=1]
+	%tmp6 = load double, double* %tmp5, align 8		; <double> [#uses=1]
+	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
+        %z1 = add i64 %tmp4, 5203
+	%tmp8 = getelementptr double, double* %p, i64 %z1		; <double*> [#uses=1]
+	store double %tmp7, double* %tmp8, align 8
+	%tmp9 = add i64 %j.01, 1		; <i64> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb1
+	%tmp10 = icmp slt i64 %tmp9, %m		; <i1> [#uses=1]
+	br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.preheader, %bb2.bb3_crit_edge
+	%tmp11 = add i64 %i.02, 1		; <i64> [#uses=2]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%tmp12 = icmp slt i64 %tmp11, %n		; <i1> [#uses=1]
+	br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
+
+bb4.return_crit_edge:		; preds = %bb4
+	br label %bb4.return_crit_edge.split
+
+bb4.return_crit_edge.split:		; preds = %bb.nph3, %bb4.return_crit_edge
+	br label %return
+
+bb.nph3:		; preds = %entry
+	%tmp13 = icmp sgt i64 %m, 0		; <i1> [#uses=1]
+	%tmp14 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp15 = mul i64 %tmp14, %o		; <i64> [#uses=1]
+	%tmp16 = mul i64 %tmp15, %q		; <i64> [#uses=1]
+	%tmp17 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp18 = mul i64 %tmp17, %o		; <i64> [#uses=1]
+	%tmp19 = mul i64 %tmp18, %q		; <i64> [#uses=1]
+	br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
+
+bb.nph3.split:		; preds = %bb.nph3
+	br label %bb2.preheader
+
+bb2.preheader:		; preds = %bb.nph3.split, %bb4
+	%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ]		; <i64> [#uses=3]
+	br i1 true, label %bb.nph, label %bb3
+
+return:		; preds = %bb4.return_crit_edge.split, %entry
+	ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/address-space-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/address-space-loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/address-space-loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/address-space-loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Copy of uglygep with a different address space
+; This tests expandAddToGEP uses the right smaller integer type for
+; another address space
+define void @Z4() nounwind {
+; CHECK-LABEL: @Z4(
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i16 %t4, 1                         ; <i16> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ]      ; <i16> [#uses=3]
+  br label %bb1
+
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8, i8 addrspace(1)* undef, i16 %t4
+; CHECK-NEXT: br label %bb14
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i16 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i16 %t4, 16                     ; <i16> [#uses=1]
+  br label %bb14
+
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float addrspace(1)*, float addrspace(1)* addrspace(1)* undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float, float addrspace(1)* %t6, i16 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8, i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8, i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t2
+  %t6 = load float addrspace(1)*, float addrspace(1)* addrspace(1)* undef
+  %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8, i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t9
+  br label %bb14
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/count-to-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/count-to-zero.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/count-to-zero.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/count-to-zero.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,45 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; rdar://7382068
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+define void @t(i32 %c) nounwind optsize {
+entry:
+  br label %bb6
+
+bb1:                                              ; preds = %bb6
+  %tmp = icmp eq i32 %c_addr.1, 20                ; <i1> [#uses=1]
+  br i1 %tmp, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+  %tmp1 = tail call i32 @f20(i32 %c_addr.1) nounwind ; <i32> [#uses=1]
+  br label %bb7
+
+bb3:                                              ; preds = %bb1
+  %tmp2 = icmp slt i32 %c_addr.1, 10              ; <i1> [#uses=1]
+  %tmp3 = add nsw i32 %c_addr.1, 1                ; <i32> [#uses=1]
+  %tmp4 = add i32 %c_addr.1, -1                   ; <i32> [#uses=1]
+  %c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+; CHECK: add nsw i32 %lsr.iv, -1
+  br label %bb6
+
+bb6:                                              ; preds = %bb3, %entry
+  %indvar = phi i32 [ %indvar.next, %bb3 ], [ 0, %entry ] ; <i32> [#uses=2]
+  %c_addr.1 = phi i32 [ %c_addr.1.be, %bb3 ], [ %c, %entry ] ; <i32> [#uses=7]
+  %tmp5 = icmp eq i32 %indvar, 9999               ; <i1> [#uses=1]
+; CHECK: icmp eq i32 %lsr.iv, 0
+  %tmp6 = icmp eq i32 %c_addr.1, 100              ; <i1> [#uses=1]
+  %or.cond = or i1 %tmp5, %tmp6                   ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb7, label %bb1
+
+bb7:                                              ; preds = %bb6, %bb2
+  %c_addr.0 = phi i32 [ %tmp1, %bb2 ], [ %c_addr.1, %bb6 ] ; <i32> [#uses=1]
+  tail call void @bar(i32 %c_addr.0) nounwind
+  ret void
+}
+
+declare i32 @f20(i32)
+
+declare void @bar(i32)

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/dead-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/dead-phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/dead-phi.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/dead-phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt < %s -loop-reduce -S | grep phi | count 1
+
+define void @foo(i32 %n) {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+
+  ; These three instructions form an isolated cycle and can be deleted.
+  %j = phi i32 [ 0, %entry ], [ %j.y, %loop ]
+  %j.x = add i32 %j, 1
+  %j.y = mul i32 %j.x, 2
+
+  %i.next = add i32 %i, 1
+  %c = icmp ne i32 %i.next, %n
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/different-type-ivs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/different-type-ivs.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/different-type-ivs.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/different-type-ivs.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt < %s -loop-reduce -disable-output
+; Test to make sure that loop-reduce never crashes on IV's 
+; with different types but identical strides.
+
+define void @foo() {
+entry:
+	br label %no_exit
+no_exit:		; preds = %no_exit, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=3]
+	%indvar.upgrd.1 = trunc i32 %indvar to i16		; <i16> [#uses=1]
+	%X.0.0 = mul i16 %indvar.upgrd.1, 1234		; <i16> [#uses=1]
+	%tmp. = mul i32 %indvar, 1234		; <i32> [#uses=1]
+	%tmp.5 = sext i16 %X.0.0 to i32		; <i32> [#uses=1]
+	%tmp.3 = call i32 (...) @bar( i32 %tmp.5, i32 %tmp. )		; <i32> [#uses=0]
+	%tmp.0 = call i1 @pred( )		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp.0, label %return, label %no_exit
+return:		; preds = %no_exit
+	ret void
+}
+
+declare i1 @pred()
+
+declare i32 @bar(...)
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/dominate-assert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/dominate-assert.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/dominate-assert.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/dominate-assert.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,113 @@
+; RUN: opt -loop-reduce < %s
+; we used to crash on this one
+
+declare i8* @_Znwm()
+declare i32 @__gxx_personality_v0(...)
+declare void @g()
+define void @f() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+bb0:
+  br label %bb1
+bb1:
+  %v0 = phi i64 [ 0, %bb0 ], [ %v1, %bb1 ]
+  %v1 = add nsw i64 %v0, 1
+  br i1 undef, label %bb2, label %bb1
+bb2:
+  %v2 = icmp eq i64 %v0, 0
+  br i1 %v2, label %bb6, label %bb3
+bb3:
+  %v3 = invoke noalias i8* @_Znwm()
+          to label %bb5 unwind label %bb4
+bb4:
+  %v4 = landingpad { i8*, i32 }
+          cleanup
+  br label %bb9
+bb5:
+  %v5 = bitcast i8* %v3 to i32**
+  %add.ptr.i = getelementptr inbounds i32*, i32** %v5, i64 %v0
+  br label %bb6
+bb6:
+  %v6 = phi i32** [ null, %bb2 ], [ %add.ptr.i, %bb5 ]
+  invoke void @g()
+          to label %bb7 unwind label %bb8
+bb7:
+  unreachable
+bb8:
+  %v7 = landingpad { i8*, i32 }
+          cleanup
+  br label %bb9
+bb9:
+  resume { i8*, i32 } zeroinitializer
+}
+
+
+define void @h() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+bb1:
+  invoke void @g() optsize
+          to label %bb2 unwind label %bb5
+bb2:
+  %arrayctor.cur = phi i8* [ undef, %bb1 ], [ %arrayctor.next, %bb3 ]
+  invoke void @g() optsize
+          to label %bb3 unwind label %bb6
+bb3:
+  %arrayctor.next = getelementptr inbounds i8, i8* %arrayctor.cur, i64 1
+  br label %bb2
+bb4:
+  ret void
+bb5:
+  %tmp = landingpad { i8*, i32 }
+          cleanup
+  invoke void @g() optsize
+          to label %bb4 unwind label %bb7
+bb6:
+  %tmp1 = landingpad { i8*, i32 }
+          cleanup
+  %arraydestroy.isempty = icmp eq i8* undef, %arrayctor.cur
+  ret void
+bb7:
+  %lpad.nonloopexit = landingpad { i8*, i32 }
+          catch i8* null
+  ret void
+}
+
+; PR17425
+define void @i() {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %c.0 = phi i16* [ undef, %entry ], [ %incdec.ptr, %while.cond ]
+  %incdec.ptr = getelementptr inbounds i16, i16* %c.0, i64 1
+  br i1 undef, label %while.cond1, label %while.cond
+
+while.cond1:                                      ; preds = %while.cond1, %while.cond
+  %c.1 = phi i16* [ %incdec.ptr5, %while.cond1 ], [ %c.0, %while.cond ]
+  %incdec.ptr5 = getelementptr inbounds i16, i16* %c.1, i64 1
+  br i1 undef, label %while.cond7, label %while.cond1
+
+while.cond7:                                      ; preds = %while.cond7, %while.cond1
+  %0 = phi i16* [ %incdec.ptr10, %while.cond7 ], [ %c.1, %while.cond1 ]
+  %incdec.ptr10 = getelementptr inbounds i16, i16* %0, i64 1
+  br i1 undef, label %while.cond12.preheader, label %while.cond7
+
+while.cond12.preheader:                           ; preds = %while.cond7
+  br i1 undef, label %while.end16, label %while.body13.lr.ph
+
+while.body13:                                     ; preds = %if.else, %while.body13.lr.ph
+  %1 = phi i16* [ %2, %while.body13.lr.ph ], [ %incdec.ptr15, %if.else ]
+  br i1 undef, label %while.cond12.outer.loopexit, label %if.else
+
+while.cond12.outer.loopexit:                      ; preds = %while.body13
+  br i1 undef, label %while.end16, label %while.body13.lr.ph
+
+while.body13.lr.ph:                               ; preds = %while.cond12.outer.loopexit, %while.cond12.preheader
+  %2 = phi i16* [ %1, %while.cond12.outer.loopexit ], [ undef, %while.cond12.preheader ]
+  br label %while.body13
+
+if.else:                                          ; preds = %while.body13
+  %incdec.ptr15 = getelementptr inbounds i16, i16* %1, i64 1
+  %cmp = icmp eq i16* %incdec.ptr15, %0
+  br i1 %cmp, label %while.end16, label %while.body13
+
+while.end16:                                      ; preds = %if.else, %while.cond12.outer.loopexit, %while.cond12.preheader
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt < %s -loop-reduce -S | \
+; RUN:   not grep "bitcast i32 1 to i32"
+; END.
+; The setlt wants to use a value that is incremented one more than the dominant
+; IV.  Don't insert the 1 outside the loop, preventing folding it into the add.
+
+define void @test([700 x i32]* %nbeaux_.0__558, i32* %i_.16574) {
+then.0:
+	br label %no_exit.2
+no_exit.2:		; preds = %no_exit.2, %then.0
+	%indvar630 = phi i32 [ 0, %then.0 ], [ %indvar.next631, %no_exit.2 ]		; <i32> [#uses=4]
+	%gep.upgrd.1 = zext i32 %indvar630 to i64		; <i64> [#uses=1]
+	%tmp.38 = getelementptr [700 x i32], [700 x i32]* %nbeaux_.0__558, i32 0, i64 %gep.upgrd.1		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp.38
+	%inc.2 = add i32 %indvar630, 2		; <i32> [#uses=2]
+	%tmp.34 = icmp slt i32 %inc.2, 701		; <i1> [#uses=1]
+	%indvar.next631 = add i32 %indvar630, 1		; <i32> [#uses=1]
+	br i1 %tmp.34, label %no_exit.2, label %loopexit.2.loopexit
+loopexit.2.loopexit:		; preds = %no_exit.2
+	store i32 %inc.2, i32* %i_.16574
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; Check that this test makes INDVAR and related stuff dead.
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+; CHECK: phi
+; CHECK: phi
+; CHECK-NOT: phi
+
+declare i1 @pred()
+
+define void @test1({ i32, i32 }* %P) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=3]
+	%gep1 = getelementptr { i32, i32 }, { i32, i32 }* %P, i32 %INDVAR, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %gep1
+	%gep2 = getelementptr { i32, i32 }, { i32, i32 }* %P, i32 %INDVAR, i32 1		; <i32*> [#uses=1]
+	store i32 0, i32* %gep2
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+
+define void @test2([2 x i32]* %P) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=3]
+	%gep1 = getelementptr [2 x i32], [2 x i32]* %P, i32 %INDVAR, i64 0		; <i32*> [#uses=1]
+	store i32 0, i32* %gep1
+	%gep2 = getelementptr [2 x i32], [2 x i32]* %P, i32 %INDVAR, i64 1		; <i32*> [#uses=1]
+	store i32 0, i32* %gep2
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; Don't reduce the byte access to P[i], at least not on targets that 
+; support an efficient 'mem[r1+r2]' addressing mode.
+
+; RUN: opt < %s -loop-reduce -disable-output
+
+
+declare i1 @pred(i32)
+
+define void @test(i8* %PTR) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%STRRED = getelementptr i8, i8* %PTR, i32 %INDVAR		; <i8*> [#uses=1]
+	store i8 0, i8* %STRRED
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=2]
+        ;; cannot eliminate indvar
+	%cond = call i1 @pred( i32 %INDVAR2 )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/dont_reverse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/dont_reverse.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/dont_reverse.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/dont_reverse.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt < %s -loop-reduce -S \
+; RUN:    | grep "icmp eq i2 %lsr.iv.next, %xmp4344"
+
+; Don't reverse the iteration if the rhs of the compare is defined
+; inside the loop.
+
+; Provide legal integer types.
+; Declare i2 as legal so that IVUsers accepts to consider %indvar3451
+target datalayout = "n2:8:16:32:64"
+
+define void @Fill_Buffer(i2* %p) nounwind {
+entry:
+	br label %bb8
+
+bb8:
+	%indvar34 = phi i32 [ 0, %entry ], [ %indvar.next35, %bb8 ]
+	%indvar3451 = trunc i32 %indvar34 to i2
+	%xmp4344 = load i2, i2* %p
+	%xmp104 = icmp eq i2 %indvar3451, %xmp4344
+	%indvar.next35 = add i32 %indvar34, 1
+	br i1 %xmp104, label %bb10, label %bb8
+
+bb10:
+	unreachable
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/ephemeral.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/ephemeral.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/ephemeral.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/ephemeral.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,41 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+
+; for (int i = 0; i < n; ++i) {
+;   use(i * 5 + 3);
+;   // i * a + b is ephemeral and shouldn't be promoted by LSR
+;   __builtin_assume(i * a + b >= 0);
+; }
+define void @ephemeral(i32 %a, i32 %b, i32 %n) {
+; CHECK-LABEL: @ephemeral(
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  ; Only i and i * 5 + 3 should be indvars, not i * a + b.
+; CHECK: phi i32
+; CHECK: phi i32
+; CHECK-NOT: phi i32
+  %inc = add nsw i32 %i, 1
+  %exitcond = icmp eq i32 %inc, %n
+
+  %0 = mul nsw i32 %i, 5
+  %1 = add nsw i32 %0, 3
+  call void @use(i32 %1)
+
+  %2 = mul nsw i32 %i, %a
+  %3 = add nsw i32 %2, %b
+  %4 = icmp sgt i32 %3, -1
+  call void @llvm.assume(i1 %4)
+
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+declare void @use(i32)
+
+declare void @llvm.assume(i1)

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; Make sure that the compare instruction occurs after the increment to avoid
+; having overlapping live ranges that result in copies.  We want the setcc 
+; instruction immediately before the conditional branch.
+;
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+define void @foo(float* %D, i32 %E) {
+entry:
+	br label %no_exit
+no_exit:		; preds = %no_exit, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=1]
+	store volatile float 0.000000e+00, float* %D
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+; CHECK: icmp
+; CHECK-NEXT: br i1
+	%exitcond = icmp eq i32 %indvar.next, %E		; <i1> [#uses=1]
+	br i1 %exitcond, label %loopexit, label %no_exit
+loopexit:		; preds = %no_exit
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/funclet.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/funclet.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/funclet.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/funclet.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,245 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+declare i32 @_except_handler3(...)
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @external(i32*)
+declare void @reserve()
+
+define void @f() personality i32 (...)* @_except_handler3 {
+entry:
+  br label %throw
+
+throw:                                            ; preds = %throw, %entry
+  %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+  invoke void @reserve()
+          to label %throw unwind label %pad
+
+pad:                                              ; preds = %throw
+  %phi2 = phi i8* [ %tmp96, %throw ]
+  %cs = catchswitch within none [label %unreachable] unwind label %blah2
+
+unreachable:
+  catchpad within %cs []
+  unreachable
+
+blah2:
+  %cleanuppadi4.i.i.i = cleanuppad within none []
+  br label %loop_body
+
+loop_body:                                        ; preds = %iter, %pad
+  %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blah2 ]
+  %tmp100 = icmp eq i8* %tmp99, undef
+  br i1 %tmp100, label %unwind_out, label %iter
+
+iter:                                             ; preds = %loop_body
+  %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+  br i1 undef, label %unwind_out, label %loop_body
+
+unwind_out:                                       ; preds = %iter, %loop_body
+  cleanupret from %cleanuppadi4.i.i.i unwind to caller
+}
+
+; CHECK-LABEL: define void @f(
+; CHECK: cleanuppad within none []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+define void @g() personality i32 (...)* @_except_handler3 {
+entry:
+  br label %throw
+
+throw:                                            ; preds = %throw, %entry
+  %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+  invoke void @reserve()
+          to label %throw unwind label %pad
+
+pad:
+  %phi2 = phi i8* [ %tmp96, %throw ]
+  %cs = catchswitch within none [label %unreachable, label %blah] unwind to caller
+
+unreachable:
+  catchpad within %cs []
+  unreachable
+
+blah:
+  %catchpad = catchpad within %cs []
+  br label %loop_body
+
+unwind_out:
+  catchret from %catchpad to label %leave
+
+leave:
+  ret void
+
+loop_body:                                        ; preds = %iter, %pad
+  %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blah ]
+  %tmp100 = icmp eq i8* %tmp99, undef
+  br i1 %tmp100, label %unwind_out, label %iter
+
+iter:                                             ; preds = %loop_body
+  %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+  br i1 undef, label %unwind_out, label %loop_body
+}
+
+; CHECK-LABEL: define void @g(
+; CHECK: blah:
+; CHECK-NEXT: catchpad within %cs []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+
+define void @h() personality i32 (...)* @_except_handler3 {
+entry:
+  br label %throw
+
+throw:                                            ; preds = %throw, %entry
+  %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+  invoke void @reserve()
+          to label %throw unwind label %pad
+
+pad:
+  %cs = catchswitch within none [label %unreachable, label %blug] unwind to caller
+
+unreachable:
+  catchpad within %cs []
+  unreachable
+
+blug:
+  %phi2 = phi i8* [ %tmp96, %pad ]
+  %catchpad = catchpad within %cs []
+  br label %loop_body
+
+unwind_out:
+  catchret from %catchpad to label %leave
+
+leave:
+  ret void
+
+loop_body:                                        ; preds = %iter, %pad
+  %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blug ]
+  %tmp100 = icmp eq i8* %tmp99, undef
+  br i1 %tmp100, label %unwind_out, label %iter
+
+iter:                                             ; preds = %loop_body
+  %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+  br i1 undef, label %unwind_out, label %loop_body
+}
+
+; CHECK-LABEL: define void @h(
+; CHECK: blug:
+; CHECK: catchpad within %cs []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+define void @i() personality i32 (...)* @_except_handler3 {
+entry:
+  br label %throw
+
+throw:                                            ; preds = %throw, %entry
+  %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+  invoke void @reserve()
+          to label %throw unwind label %catchpad
+
+catchpad:                                              ; preds = %throw
+  %phi2 = phi i8* [ %tmp96, %throw ]
+  %cs = catchswitch within none [label %cp_body] unwind label %cleanuppad
+
+cp_body:
+  catchpad within %cs []
+  br label %loop_head
+
+cleanuppad:
+  cleanuppad within none []
+  br label %loop_head
+
+loop_head:
+  br label %loop_body
+
+loop_body:                                        ; preds = %iter, %catchpad
+  %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %loop_head ]
+  %tmp100 = icmp eq i8* %tmp99, undef
+  br i1 %tmp100, label %unwind_out, label %iter
+
+iter:                                             ; preds = %loop_body
+  %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+  br i1 undef, label %unwind_out, label %loop_body
+
+unwind_out:                                       ; preds = %iter, %loop_body
+  unreachable
+}
+
+; CHECK-LABEL: define void @i(
+; CHECK: ptrtoint i8* %phi2 to i32
+
+define void @test1(i32* %b, i32* %c) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %d.0 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.inc ]
+  invoke void @external(i32* %d.0)
+          to label %for.inc unwind label %catch.dispatch
+
+for.inc:                                          ; preds = %for.cond
+  %incdec.ptr = getelementptr inbounds i32, i32* %d.0, i32 1
+  br label %for.cond
+
+catch.dispatch:                                   ; preds = %for.cond
+  %cs = catchswitch within none [label %catch] unwind label %catch.dispatch.2
+
+catch:                                            ; preds = %catch.dispatch
+  %0 = catchpad within %cs [i8* null, i32 64, i8* null]
+  catchret from %0 to label %try.cont
+
+try.cont:                                         ; preds = %catch
+  invoke void @external(i32* %c)
+          to label %try.cont.7 unwind label %catch.dispatch.2
+
+catch.dispatch.2:                                 ; preds = %try.cont, %catchendblock
+  %e.0 = phi i32* [ %c, %try.cont ], [ %b, %catch.dispatch ]
+  %cs2 = catchswitch within none [label %catch.4] unwind to caller
+
+catch.4:                                          ; preds = %catch.dispatch.2
+  catchpad within %cs2 [i8* null, i32 64, i8* null]
+  unreachable
+
+try.cont.7:                                       ; preds = %try.cont
+  ret void
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: for.cond:
+; CHECK:   %d.0 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.inc ]
+
+; CHECK: catch.dispatch.2:
+; CHECK: %e.0 = phi i32* [ %c, %try.cont ], [ %b, %catch.dispatch ]
+
+define i32 @test2() personality i32 (...)* @_except_handler3 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %phi = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+  invoke void @reserve()
+          to label %for.inc unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %for.body
+  %tmp18 = catchswitch within none [label %catch.handler] unwind to caller
+
+catch.handler:                                    ; preds = %catch.dispatch
+  %phi.lcssa = phi i32 [ %phi, %catch.dispatch ]
+  %tmp19 = catchpad within %tmp18 [i8* null]
+  catchret from %tmp19 to label %done
+
+done:
+  ret i32 %phi.lcssa
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %phi, 1
+  br label %for.body
+}
+
+; CHECK-LABEL: define i32 @test2(
+; CHECK:      %phi.lcssa = phi i32 [ %phi, %catch.dispatch ]
+; CHECK-NEXT: catchpad within

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; RUN: opt < %s -loop-reduce -verify
+target triple = "x86_64-apple-darwin10"
+
+define void @myquicksort(i8* %a) nounwind ssp {
+entry:
+  br i1 undef, label %loop1, label %return
+
+loop1:                                            ; preds = %bb13.loopexit, %entry
+  %indvar419 = phi i64 [ %indvar.next420, %loop2.exit ], [ 0, %entry ]
+  %tmp474 = shl i64 %indvar419, 2
+  %tmp484 = add i64 %tmp474, 4
+  br label %loop2
+
+loop2:                                            ; preds = %loop1, %loop2.backedge
+  %indvar414 = phi i64 [ %indvar.next415, %loop2.backedge ], [ 0, %loop1 ]
+  %tmp473 = mul i64 %indvar414, -4
+  %tmp485 = add i64 %tmp484, %tmp473
+  %storemerge4 = getelementptr i8, i8* %a, i64 %tmp485
+  %0 = icmp ugt i8* %storemerge4, %a
+  br i1 false, label %loop2.exit, label %loop2.backedge
+
+loop2.backedge:                                   ; preds = %loop2
+  %indvar.next415 = add i64 %indvar414, 1
+  br label %loop2
+
+loop2.exit:                                       ; preds = %loop2
+  %indvar.next420 = add i64 %indvar419, 1
+  br i1 undef, label %loop1, label %return
+
+return:                                           ; preds = %loop2.exit, %entry
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/illegal-addr-modes.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,122 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+; These are regression tests for
+;  https://bugs.llvm.org/show_bug.cgi?id=34106
+;    "ARMTargetLowering::isLegalAddressingMode can accept incorrect
+;    addressing modes for Thumb1 target"
+;  https://reviews.llvm.org/D34583
+;    "[LSR] Narrow search space by filtering non-optimal formulae with the
+;    same ScaledReg and Scale."
+;
+; Due to a bug in ARMTargetLowering::isLegalAddressingMode LSR got 
+; 4*reg({0,+,-1}) and -4*reg({0,+,-1}) had the same cost for the Thumb1 target.
+; Another issue was that LSR got that -1*reg was free for the Thumb1 target.
+
+; Test case 01: -1*reg is not free for the Thumb1 target.
+; 
+; CHECK-LABEL: @negativeOneCase
+; CHECK-NOT: mul
+; CHECK: ret i8
+define i8* @negativeOneCase(i8* returned %a, i8* nocapture readonly %b, i32 %n) nounwind {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %a, i32 -1
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %p.0 = phi i8* [ %add.ptr, %entry ], [ %incdec.ptr, %while.cond ]
+  %incdec.ptr = getelementptr inbounds i8, i8* %p.0, i32 1
+  %0 = load i8, i8* %incdec.ptr, align 1
+  %cmp = icmp eq i8 %0, 0
+  br i1 %cmp, label %while.cond2.preheader, label %while.cond
+
+while.cond2.preheader:                            ; preds = %while.cond
+  br label %while.cond2
+
+while.cond2:                                      ; preds = %while.cond2.preheader, %while.body5
+  %b.addr.0 = phi i8* [ %incdec.ptr6, %while.body5 ], [ %b, %while.cond2.preheader ]
+  %n.addr.0 = phi i32 [ %dec, %while.body5 ], [ %n, %while.cond2.preheader ]
+  %p.1 = phi i8* [ %incdec.ptr7, %while.body5 ], [ %incdec.ptr, %while.cond2.preheader ]
+  %cmp3 = icmp eq i32 %n.addr.0, 0
+  br i1 %cmp3, label %while.end8, label %while.body5
+
+while.body5:                                      ; preds = %while.cond2
+  %dec = add i32 %n.addr.0, -1
+  %incdec.ptr6 = getelementptr inbounds i8, i8* %b.addr.0, i32 1
+  %1 = load i8, i8* %b.addr.0, align 1
+  %incdec.ptr7 = getelementptr inbounds i8, i8* %p.1, i32 1
+  store i8 %1, i8* %p.1, align 1
+  br label %while.cond2
+
+while.end8:                                       ; preds = %while.cond2
+  %scevgep = getelementptr i8, i8* %incdec.ptr, i32 %n
+  store i8 0, i8* %scevgep, align 1
+  ret i8* %a
+}
+
+; Test case 02: 4*reg({0,+,-1}) and -4*reg({0,+,-1}) are not supported for
+;               the Thumb1 target.
+; 
+; CHECK-LABEL: @negativeFourCase
+; CHECK-NOT: mul
+; CHECK: ret void
+define void @negativeFourCase(i8* %ptr1, i32* %ptr2) nounwind {
+entry:
+  br label %for.cond6.preheader.us.i.i
+
+for.cond6.preheader.us.i.i:                       ; preds = %if.end48.us.i.i, %entry
+  %addr.0108.us.i.i = phi i8* [ %scevgep.i.i, %if.end48.us.i.i ], [ %ptr1, %entry ]
+  %inc49.us.i.i = phi i32 [ %inc50.us.i.i, %if.end48.us.i.i ], [ 0, %entry ]
+  %c1.0104.us.i.i = phi i32* [ %c0.0103.us.i.i, %if.end48.us.i.i ], [ %ptr2, %entry ]
+  %c0.0103.us.i.i = phi i32* [ %c1.0104.us.i.i, %if.end48.us.i.i ], [ %ptr2, %entry ]
+  br label %for.body8.us.i.i
+
+if.end48.us.i.i:                                  ; preds = %for.inc.us.i.i
+  %scevgep.i.i = getelementptr i8, i8* %addr.0108.us.i.i, i32 256
+  %inc50.us.i.i = add nuw nsw i32 %inc49.us.i.i, 1
+  %exitcond110.i.i = icmp eq i32 %inc50.us.i.i, 256
+  br i1 %exitcond110.i.i, label %exit.i, label %for.cond6.preheader.us.i.i
+
+for.body8.us.i.i:                                 ; preds = %for.inc.us.i.i, %for.cond6.preheader.us.i.i
+  %addr.198.us.i.i = phi i8* [ %addr.0108.us.i.i, %for.cond6.preheader.us.i.i ], [ %incdec.ptr.us.i.i, %for.inc.us.i.i ]
+  %inc.196.us.i.i = phi i32 [ 0, %for.cond6.preheader.us.i.i ], [ %inc.2.us.i.i, %for.inc.us.i.i ]
+  %c.093.us.i.i = phi i32 [ 0, %for.cond6.preheader.us.i.i ], [ %inc43.us.i.i, %for.inc.us.i.i ]
+  %incdec.ptr.us.i.i = getelementptr inbounds i8, i8* %addr.198.us.i.i, i32 1
+  %0 = load i8, i8* %addr.198.us.i.i, align 1
+  %cmp9.us.i.i = icmp eq i8 %0, -1
+  br i1 %cmp9.us.i.i, label %if.end37.us.i.i, label %if.else.us.i.i
+
+if.else.us.i.i:                                   ; preds = %for.body8.us.i.i
+  %add12.us.i.i = add nuw nsw i32 %c.093.us.i.i, 1
+  %arrayidx13.us.i.i = getelementptr inbounds i32, i32* %c1.0104.us.i.i, i32 %add12.us.i.i
+  %1 = load i32, i32* %arrayidx13.us.i.i, align 4
+  %arrayidx16.us.i.i = getelementptr inbounds i32, i32* %c1.0104.us.i.i, i32 %c.093.us.i.i
+  %2 = load i32, i32* %arrayidx16.us.i.i, align 4
+  %sub19.us.i.i = add nsw i32 %c.093.us.i.i, -1
+  %arrayidx20.us.i.i = getelementptr inbounds i32, i32* %c1.0104.us.i.i, i32 %sub19.us.i.i
+  %3 = load i32, i32* %arrayidx20.us.i.i, align 4
+  br label %if.end37.us.i.i
+
+if.end37.us.i.i:                                  ; preds = %if.else.us.i.i, %for.body8.us.i.i
+  %4 = phi i32 [ %3, %if.else.us.i.i ], [ 0, %for.body8.us.i.i ]
+  %arrayidx36.us.i.i = getelementptr inbounds i32, i32* %c0.0103.us.i.i, i32 %c.093.us.i.i
+  store i32 %4, i32* %arrayidx36.us.i.i, align 4
+  %inc.us.i.i = add nsw i32 %inc.196.us.i.i, 1
+  %cmp38.us.i.i = icmp sgt i32 %inc.196.us.i.i, 6
+  br i1 %cmp38.us.i.i, label %if.then40.us.i.i, label %for.inc.us.i.i
+
+if.then40.us.i.i:                                 ; preds = %if.end37.us.i.i
+  br label %for.inc.us.i.i
+
+for.inc.us.i.i:                                   ; preds = %if.then40.us.i.i, %if.end37.us.i.i
+  %inc.2.us.i.i = phi i32 [ 0, %if.then40.us.i.i ], [ %inc.us.i.i, %if.end37.us.i.i ]
+  %inc43.us.i.i = add nuw nsw i32 %c.093.us.i.i, 1
+  %exitcond.i.i = icmp eq i32 %inc43.us.i.i, 256
+  br i1 %exitcond.i.i, label %if.end48.us.i.i, label %for.body8.us.i.i
+
+exit.i:                               ; preds = %if.end48.us.i.i
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/invariant_value_first.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/invariant_value_first.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/invariant_value_first.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; Check that the index of 'P[outer]' is pulled out of the loop.
+; RUN: opt < %s -loop-reduce -S | \
+; RUN:   not grep "getelementptr.*%outer.*%INDVAR"
+
+target datalayout = "e-p:32:32:32-n8:16:32"
+declare i1 @pred()
+
+declare i32 @foo()
+
+define void @test([10000 x i32]* %P) {
+; <label>:0
+	%outer = call i32 @foo( )		; <i32> [#uses=1]
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%STRRED = getelementptr [10000 x i32], [10000 x i32]* %P, i32 %outer, i32 %INDVAR		; <i32*> [#uses=1]
+	store i32 0, i32* %STRRED
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; Check that the index of 'P[outer]' is pulled out of the loop.
+; RUN: opt < %s -loop-reduce -S | \
+; RUN:   not grep "getelementptr.*%outer.*%INDVAR"
+
+target datalayout = "e-p:32:32:32-n32"
+declare i1 @pred()
+
+define void @test([10000 x i32]* %P, i32 %outer) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%STRRED = getelementptr [10000 x i32], [10000 x i32]* %P, i32 %outer, i32 %INDVAR		; <i32*> [#uses=1]
+	store i32 0, i32* %STRRED
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/ivchain.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/ivchain.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/ivchain.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/ivchain.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,48 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; RUN: opt -passes='require<scalar-evolution>,require<targetir>,loop(strength-reduce)' < %s -S | FileCheck %s
+;
+; PR11782: bad cast to AddRecExpr.
+; A sign extend feeds an IVUser and cannot be hoisted into the AddRec.
+; CollectIVChains should bailout on this case.
+
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+%struct = type { i8*, i8*, i16, i64, i16, i16, i16, i64, i64, i16, i8*, i64, i64, i64 }
+
+; CHECK-LABEL: @test(
+; CHECK: for.body:
+; CHECK: lsr.iv = phi %struct
+; CHECK: br
+define i32 @test(i8* %h, i32 %more) nounwind uwtable {
+entry:
+  br i1 undef, label %land.end238, label %return
+
+land.end238:                                      ; preds = %if.end229
+  br label %for.body
+
+for.body:                                         ; preds = %sw.epilog, %land.end238
+  %fbh.0 = phi %struct* [ undef, %land.end238 ], [ %incdec.ptr, %sw.epilog ]
+  %column_n.0 = phi i16 [ 0, %land.end238 ], [ %inc601, %sw.epilog ]
+  %conv250 = sext i16 %column_n.0 to i32
+  %add257 = add nsw i32 %conv250, 1
+  %conv258 = trunc i32 %add257 to i16
+  %cmp263 = icmp ult i16 undef, 2
+  br label %if.end388
+
+if.end388:                                        ; preds = %if.then380, %if.else356
+  %ColLength = getelementptr inbounds %struct, %struct* %fbh.0, i64 0, i32 7
+  %call405 = call signext i16 @SQLColAttribute(i8* undef, i16 zeroext %conv258, i16 zeroext 1003, i8* null, i16 signext 0, i16* null, i64* %ColLength) nounwind
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb542, %sw.bb523, %if.end475
+  %inc601 = add i16 %column_n.0, 1
+  %incdec.ptr = getelementptr inbounds %struct, %struct* %fbh.0, i64 1
+  br label %for.body
+
+return:                                           ; preds = %entry
+  ret i32 1
+}
+
+declare signext i16 @SQLColAttribute(i8*, i16 zeroext, i16 zeroext, i8*, i16 signext, i16*, i64*)

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/lsr-comp-time.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/lsr-comp-time.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/lsr-comp-time.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/lsr-comp-time.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,1338 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+; RUN: opt -loop-reduce -lsr-complexity-limit=2147483647 -S < %s | FileCheck %s
+
+; Test compile time should be <1sec (no hang).
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone uwtable
+define dso_local i32 @foo(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6) local_unnamed_addr #3 {
+; CHECK-LABEL: @foo(
+; CHECK:       bb33:
+; CHECK:       lsr.iv
+; CHECK:       bb58:
+; CHECK:       lsr.iv
+; CHECK:       bb81:
+; CHECK:       lsr.iv
+; CHECK:       bb104:
+; CHECK:       lsr.iv
+; CHECK:       bb127:
+; CHECK:       lsr.iv
+; CHECK:       bb150:
+; CHECK:       lsr.iv
+; CHECK:       bb173:
+; CHECK:       lsr.iv
+; CHECK:       bb196:
+; CHECK:       lsr.iv
+; CHECK:       bb219:
+; CHECK:       lsr.iv
+; CHECK:       bb242:
+; CHECK:       lsr.iv
+; CHECK:       bb265:
+; CHECK:       lsr.iv
+; CHECK:       bb288:
+; CHECK:       lsr.iv
+; CHECK:       bb311:
+; CHECK:       lsr.iv
+; CHECK:       bb340:
+; CHECK:       lsr.iv
+; CHECK:       bb403:
+; CHECK:       lsr.iv
+; CHECK:       bb433:
+; CHECK:       lsr.iv
+; CHECK:       bb567:
+; CHECK:       lsr.iv
+; CHECK:       bb611:
+; CHECK:       lsr.iv
+; CHECK:       bb655:
+; CHECK:       lsr.iv
+; CHECK:       bb699:
+; CHECK:       lsr.iv
+; CHECK:       bb743:
+; CHECK:       lsr.iv
+; CHECK:       bb787:
+; CHECK:       lsr.iv
+; CHECK:       bb831:
+; CHECK:       lsr.iv
+; CHECK:       bb875:
+; CHECK:       lsr.iv
+; CHECK:       bb919:
+; CHECK:       lsr.iv
+; CHECK:       bb963:
+; CHECK:       lsr.iv
+; CHECK:       bb1007:
+; CHECK:       lsr.iv
+; CHECK:    ret
+;
+bb:
+  %tmp = alloca [100 x i32], align 16
+  %tmp7 = alloca [100 x i32], align 16
+  %tmp8 = alloca [100 x i32], align 16
+  %tmp9 = alloca [100 x [100 x i32]], align 16
+  %tmp10 = alloca [100 x i32], align 16
+  %tmp11 = alloca [100 x [100 x i32]], align 16
+  %tmp12 = alloca [100 x i32], align 16
+  %tmp13 = alloca [100 x i32], align 16
+  %tmp14 = alloca [100 x [100 x i32]], align 16
+  %tmp15 = alloca [100 x i32], align 16
+  %tmp16 = alloca [100 x [100 x i32]], align 16
+  %tmp17 = alloca [100 x [100 x i32]], align 16
+  %tmp18 = alloca [100 x [100 x i32]], align 16
+  %tmp19 = bitcast [100 x i32]* %tmp to i8*
+  call void @llvm.lifetime.start.p0i8(i64 400, i8* nonnull %tmp19) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp19, i8 0, i64 400, i1 false)
+  %tmp20 = bitcast [100 x i32]* %tmp7 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 400, i8* nonnull %tmp20) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp20, i8 0, i64 400, i1 false)
+  %tmp21 = bitcast [100 x i32]* %tmp8 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 400, i8* nonnull %tmp21) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp21, i8 0, i64 400, i1 false)
+  %tmp22 = bitcast [100 x [100 x i32]]* %tmp9 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 40000, i8* nonnull %tmp22) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp22, i8 0, i64 40000, i1 false)
+  %tmp23 = bitcast [100 x i32]* %tmp10 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 400, i8* nonnull %tmp23) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp23, i8 0, i64 400, i1 false)
+  %tmp24 = bitcast [100 x [100 x i32]]* %tmp11 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 40000, i8* nonnull %tmp24) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp24, i8 0, i64 40000, i1 false)
+  %tmp25 = bitcast [100 x i32]* %tmp12 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 400, i8* nonnull %tmp25) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp25, i8 0, i64 400, i1 false)
+  %tmp26 = bitcast [100 x i32]* %tmp13 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 400, i8* nonnull %tmp26) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp26, i8 0, i64 400, i1 false)
+  %tmp27 = bitcast [100 x [100 x i32]]* %tmp14 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 40000, i8* nonnull %tmp27) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp27, i8 0, i64 40000, i1 false)
+  %tmp28 = bitcast [100 x i32]* %tmp15 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 400, i8* nonnull %tmp28) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp28, i8 0, i64 400, i1 false)
+  %tmp29 = bitcast [100 x [100 x i32]]* %tmp16 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 40000, i8* nonnull %tmp29) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp29, i8 0, i64 40000, i1 false)
+  %tmp30 = bitcast [100 x [100 x i32]]* %tmp17 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 40000, i8* nonnull %tmp30) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp30, i8 0, i64 40000, i1 false)
+  %tmp31 = bitcast [100 x [100 x i32]]* %tmp18 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 40000, i8* nonnull %tmp31) #4
+  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %tmp31, i8 0, i64 40000, i1 false)
+  %tmp32 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 3
+  br label %bb33
+
+bb33:                                             ; preds = %bb33, %bb
+  %tmp34 = phi i64 [ 0, %bb ], [ %tmp54, %bb33 ]
+  %tmp35 = trunc i64 %tmp34 to i32
+  %tmp36 = add i32 %tmp35, 48
+  %tmp37 = urem i32 %tmp36, 101
+  %tmp38 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp34
+  store i32 %tmp37, i32* %tmp38, align 16
+  %tmp39 = or i64 %tmp34, 1
+  %tmp40 = trunc i64 %tmp39 to i32
+  %tmp41 = sub i32 48, %tmp40
+  %tmp42 = urem i32 %tmp41, 101
+  %tmp43 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp39
+  store i32 %tmp42, i32* %tmp43, align 4
+  %tmp44 = or i64 %tmp34, 2
+  %tmp45 = trunc i64 %tmp44 to i32
+  %tmp46 = add i32 %tmp45, 48
+  %tmp47 = urem i32 %tmp46, 101
+  %tmp48 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp44
+  store i32 %tmp47, i32* %tmp48, align 8
+  %tmp49 = or i64 %tmp34, 3
+  %tmp50 = trunc i64 %tmp49 to i32
+  %tmp51 = sub i32 48, %tmp50
+  %tmp52 = urem i32 %tmp51, 101
+  %tmp53 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp49
+  store i32 %tmp52, i32* %tmp53, align 4
+  %tmp54 = add nuw nsw i64 %tmp34, 4
+  %tmp55 = icmp eq i64 %tmp54, 100
+  br i1 %tmp55, label %bb56, label %bb33
+
+bb56:                                             ; preds = %bb33
+  %tmp57 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp16, i64 0, i64 88, i64 91
+  br label %bb58
+
+bb58:                                             ; preds = %bb58, %bb56
+  %tmp59 = phi i64 [ 0, %bb56 ], [ %tmp79, %bb58 ]
+  %tmp60 = trunc i64 %tmp59 to i32
+  %tmp61 = add i32 %tmp60, 83
+  %tmp62 = urem i32 %tmp61, 101
+  %tmp63 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp59
+  store i32 %tmp62, i32* %tmp63, align 16
+  %tmp64 = or i64 %tmp59, 1
+  %tmp65 = trunc i64 %tmp64 to i32
+  %tmp66 = sub i32 83, %tmp65
+  %tmp67 = urem i32 %tmp66, 101
+  %tmp68 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp64
+  store i32 %tmp67, i32* %tmp68, align 4
+  %tmp69 = or i64 %tmp59, 2
+  %tmp70 = trunc i64 %tmp69 to i32
+  %tmp71 = add i32 %tmp70, 83
+  %tmp72 = urem i32 %tmp71, 101
+  %tmp73 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp69
+  store i32 %tmp72, i32* %tmp73, align 8
+  %tmp74 = or i64 %tmp59, 3
+  %tmp75 = trunc i64 %tmp74 to i32
+  %tmp76 = sub i32 83, %tmp75
+  %tmp77 = urem i32 %tmp76, 101
+  %tmp78 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp74
+  store i32 %tmp77, i32* %tmp78, align 4
+  %tmp79 = add nuw nsw i64 %tmp59, 4
+  %tmp80 = icmp eq i64 %tmp79, 100
+  br i1 %tmp80, label %bb81, label %bb58
+
+bb81:                                             ; preds = %bb81, %bb58
+  %tmp82 = phi i64 [ %tmp102, %bb81 ], [ 0, %bb58 ]
+  %tmp83 = trunc i64 %tmp82 to i32
+  %tmp84 = add i32 %tmp83, 15
+  %tmp85 = urem i32 %tmp84, 101
+  %tmp86 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 %tmp82
+  store i32 %tmp85, i32* %tmp86, align 16
+  %tmp87 = or i64 %tmp82, 1
+  %tmp88 = trunc i64 %tmp87 to i32
+  %tmp89 = sub i32 15, %tmp88
+  %tmp90 = urem i32 %tmp89, 101
+  %tmp91 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 %tmp87
+  store i32 %tmp90, i32* %tmp91, align 4
+  %tmp92 = or i64 %tmp82, 2
+  %tmp93 = trunc i64 %tmp92 to i32
+  %tmp94 = add i32 %tmp93, 15
+  %tmp95 = urem i32 %tmp94, 101
+  %tmp96 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 %tmp92
+  store i32 %tmp95, i32* %tmp96, align 8
+  %tmp97 = or i64 %tmp82, 3
+  %tmp98 = trunc i64 %tmp97 to i32
+  %tmp99 = sub i32 15, %tmp98
+  %tmp100 = urem i32 %tmp99, 101
+  %tmp101 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 %tmp97
+  store i32 %tmp100, i32* %tmp101, align 4
+  %tmp102 = add nuw nsw i64 %tmp82, 4
+  %tmp103 = icmp eq i64 %tmp102, 100
+  br i1 %tmp103, label %bb104, label %bb81
+
+bb104:                                            ; preds = %bb104, %bb81
+  %tmp105 = phi i64 [ %tmp125, %bb104 ], [ 0, %bb81 ]
+  %tmp106 = trunc i64 %tmp105 to i32
+  %tmp107 = add i32 %tmp106, 60
+  %tmp108 = urem i32 %tmp107, 101
+  %tmp109 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp9, i64 0, i64 0, i64 %tmp105
+  store i32 %tmp108, i32* %tmp109, align 16
+  %tmp110 = or i64 %tmp105, 1
+  %tmp111 = trunc i64 %tmp110 to i32
+  %tmp112 = sub i32 60, %tmp111
+  %tmp113 = urem i32 %tmp112, 101
+  %tmp114 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp9, i64 0, i64 0, i64 %tmp110
+  store i32 %tmp113, i32* %tmp114, align 4
+  %tmp115 = or i64 %tmp105, 2
+  %tmp116 = trunc i64 %tmp115 to i32
+  %tmp117 = add i32 %tmp116, 60
+  %tmp118 = urem i32 %tmp117, 101
+  %tmp119 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp9, i64 0, i64 0, i64 %tmp115
+  store i32 %tmp118, i32* %tmp119, align 8
+  %tmp120 = or i64 %tmp105, 3
+  %tmp121 = trunc i64 %tmp120 to i32
+  %tmp122 = sub i32 60, %tmp121
+  %tmp123 = urem i32 %tmp122, 101
+  %tmp124 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp9, i64 0, i64 0, i64 %tmp120
+  store i32 %tmp123, i32* %tmp124, align 4
+  %tmp125 = add nuw nsw i64 %tmp105, 4
+  %tmp126 = icmp eq i64 %tmp125, 10000
+  br i1 %tmp126, label %bb127, label %bb104
+
+bb127:                                            ; preds = %bb127, %bb104
+  %tmp128 = phi i64 [ %tmp148, %bb127 ], [ 0, %bb104 ]
+  %tmp129 = trunc i64 %tmp128 to i32
+  %tmp130 = add i32 %tmp129, 87
+  %tmp131 = urem i32 %tmp130, 101
+  %tmp132 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp128
+  store i32 %tmp131, i32* %tmp132, align 16
+  %tmp133 = or i64 %tmp128, 1
+  %tmp134 = trunc i64 %tmp133 to i32
+  %tmp135 = sub i32 87, %tmp134
+  %tmp136 = urem i32 %tmp135, 101
+  %tmp137 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp133
+  store i32 %tmp136, i32* %tmp137, align 4
+  %tmp138 = or i64 %tmp128, 2
+  %tmp139 = trunc i64 %tmp138 to i32
+  %tmp140 = add i32 %tmp139, 87
+  %tmp141 = urem i32 %tmp140, 101
+  %tmp142 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp138
+  store i32 %tmp141, i32* %tmp142, align 8
+  %tmp143 = or i64 %tmp128, 3
+  %tmp144 = trunc i64 %tmp143 to i32
+  %tmp145 = sub i32 87, %tmp144
+  %tmp146 = urem i32 %tmp145, 101
+  %tmp147 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp143
+  store i32 %tmp146, i32* %tmp147, align 4
+  %tmp148 = add nuw nsw i64 %tmp128, 4
+  %tmp149 = icmp eq i64 %tmp148, 100
+  br i1 %tmp149, label %bb150, label %bb127
+
+bb150:                                            ; preds = %bb150, %bb127
+  %tmp151 = phi i64 [ %tmp171, %bb150 ], [ 0, %bb127 ]
+  %tmp152 = trunc i64 %tmp151 to i32
+  %tmp153 = add i32 %tmp152, 36
+  %tmp154 = urem i32 %tmp153, 101
+  %tmp155 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp11, i64 0, i64 0, i64 %tmp151
+  store i32 %tmp154, i32* %tmp155, align 16
+  %tmp156 = or i64 %tmp151, 1
+  %tmp157 = trunc i64 %tmp156 to i32
+  %tmp158 = sub i32 36, %tmp157
+  %tmp159 = urem i32 %tmp158, 101
+  %tmp160 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp11, i64 0, i64 0, i64 %tmp156
+  store i32 %tmp159, i32* %tmp160, align 4
+  %tmp161 = or i64 %tmp151, 2
+  %tmp162 = trunc i64 %tmp161 to i32
+  %tmp163 = add i32 %tmp162, 36
+  %tmp164 = urem i32 %tmp163, 101
+  %tmp165 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp11, i64 0, i64 0, i64 %tmp161
+  store i32 %tmp164, i32* %tmp165, align 8
+  %tmp166 = or i64 %tmp151, 3
+  %tmp167 = trunc i64 %tmp166 to i32
+  %tmp168 = sub i32 36, %tmp167
+  %tmp169 = urem i32 %tmp168, 101
+  %tmp170 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp11, i64 0, i64 0, i64 %tmp166
+  store i32 %tmp169, i32* %tmp170, align 4
+  %tmp171 = add nuw nsw i64 %tmp151, 4
+  %tmp172 = icmp eq i64 %tmp171, 10000
+  br i1 %tmp172, label %bb173, label %bb150
+
+bb173:                                            ; preds = %bb173, %bb150
+  %tmp174 = phi i64 [ %tmp194, %bb173 ], [ 0, %bb150 ]
+  %tmp175 = trunc i64 %tmp174 to i32
+  %tmp176 = add i32 %tmp175, 27
+  %tmp177 = urem i32 %tmp176, 101
+  %tmp178 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp12, i64 0, i64 %tmp174
+  store i32 %tmp177, i32* %tmp178, align 16
+  %tmp179 = or i64 %tmp174, 1
+  %tmp180 = trunc i64 %tmp179 to i32
+  %tmp181 = sub i32 27, %tmp180
+  %tmp182 = urem i32 %tmp181, 101
+  %tmp183 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp12, i64 0, i64 %tmp179
+  store i32 %tmp182, i32* %tmp183, align 4
+  %tmp184 = or i64 %tmp174, 2
+  %tmp185 = trunc i64 %tmp184 to i32
+  %tmp186 = add i32 %tmp185, 27
+  %tmp187 = urem i32 %tmp186, 101
+  %tmp188 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp12, i64 0, i64 %tmp184
+  store i32 %tmp187, i32* %tmp188, align 8
+  %tmp189 = or i64 %tmp174, 3
+  %tmp190 = trunc i64 %tmp189 to i32
+  %tmp191 = sub i32 27, %tmp190
+  %tmp192 = urem i32 %tmp191, 101
+  %tmp193 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp12, i64 0, i64 %tmp189
+  store i32 %tmp192, i32* %tmp193, align 4
+  %tmp194 = add nuw nsw i64 %tmp174, 4
+  %tmp195 = icmp eq i64 %tmp194, 100
+  br i1 %tmp195, label %bb196, label %bb173
+
+bb196:                                            ; preds = %bb196, %bb173
+  %tmp197 = phi i64 [ %tmp217, %bb196 ], [ 0, %bb173 ]
+  %tmp198 = trunc i64 %tmp197 to i32
+  %tmp199 = add i32 %tmp198, 40
+  %tmp200 = urem i32 %tmp199, 101
+  %tmp201 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp13, i64 0, i64 %tmp197
+  store i32 %tmp200, i32* %tmp201, align 16
+  %tmp202 = or i64 %tmp197, 1
+  %tmp203 = trunc i64 %tmp202 to i32
+  %tmp204 = sub i32 40, %tmp203
+  %tmp205 = urem i32 %tmp204, 101
+  %tmp206 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp13, i64 0, i64 %tmp202
+  store i32 %tmp205, i32* %tmp206, align 4
+  %tmp207 = or i64 %tmp197, 2
+  %tmp208 = trunc i64 %tmp207 to i32
+  %tmp209 = add i32 %tmp208, 40
+  %tmp210 = urem i32 %tmp209, 101
+  %tmp211 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp13, i64 0, i64 %tmp207
+  store i32 %tmp210, i32* %tmp211, align 8
+  %tmp212 = or i64 %tmp197, 3
+  %tmp213 = trunc i64 %tmp212 to i32
+  %tmp214 = sub i32 40, %tmp213
+  %tmp215 = urem i32 %tmp214, 101
+  %tmp216 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp13, i64 0, i64 %tmp212
+  store i32 %tmp215, i32* %tmp216, align 4
+  %tmp217 = add nuw nsw i64 %tmp197, 4
+  %tmp218 = icmp eq i64 %tmp217, 100
+  br i1 %tmp218, label %bb219, label %bb196
+
+bb219:                                            ; preds = %bb219, %bb196
+  %tmp220 = phi i64 [ %tmp240, %bb219 ], [ 0, %bb196 ]
+  %tmp221 = trunc i64 %tmp220 to i32
+  %tmp222 = add i32 %tmp221, 84
+  %tmp223 = urem i32 %tmp222, 101
+  %tmp224 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp14, i64 0, i64 0, i64 %tmp220
+  store i32 %tmp223, i32* %tmp224, align 16
+  %tmp225 = or i64 %tmp220, 1
+  %tmp226 = trunc i64 %tmp225 to i32
+  %tmp227 = sub i32 84, %tmp226
+  %tmp228 = urem i32 %tmp227, 101
+  %tmp229 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp14, i64 0, i64 0, i64 %tmp225
+  store i32 %tmp228, i32* %tmp229, align 4
+  %tmp230 = or i64 %tmp220, 2
+  %tmp231 = trunc i64 %tmp230 to i32
+  %tmp232 = add i32 %tmp231, 84
+  %tmp233 = urem i32 %tmp232, 101
+  %tmp234 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp14, i64 0, i64 0, i64 %tmp230
+  store i32 %tmp233, i32* %tmp234, align 8
+  %tmp235 = or i64 %tmp220, 3
+  %tmp236 = trunc i64 %tmp235 to i32
+  %tmp237 = sub i32 84, %tmp236
+  %tmp238 = urem i32 %tmp237, 101
+  %tmp239 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp14, i64 0, i64 0, i64 %tmp235
+  store i32 %tmp238, i32* %tmp239, align 4
+  %tmp240 = add nuw nsw i64 %tmp220, 4
+  %tmp241 = icmp eq i64 %tmp240, 10000
+  br i1 %tmp241, label %bb242, label %bb219
+
+bb242:                                            ; preds = %bb242, %bb219
+  %tmp243 = phi i64 [ %tmp263, %bb242 ], [ 0, %bb219 ]
+  %tmp244 = trunc i64 %tmp243 to i32
+  %tmp245 = add i32 %tmp244, 94
+  %tmp246 = urem i32 %tmp245, 101
+  %tmp247 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp15, i64 0, i64 %tmp243
+  store i32 %tmp246, i32* %tmp247, align 16
+  %tmp248 = or i64 %tmp243, 1
+  %tmp249 = trunc i64 %tmp248 to i32
+  %tmp250 = sub i32 94, %tmp249
+  %tmp251 = urem i32 %tmp250, 101
+  %tmp252 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp15, i64 0, i64 %tmp248
+  store i32 %tmp251, i32* %tmp252, align 4
+  %tmp253 = or i64 %tmp243, 2
+  %tmp254 = trunc i64 %tmp253 to i32
+  %tmp255 = add i32 %tmp254, 94
+  %tmp256 = urem i32 %tmp255, 101
+  %tmp257 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp15, i64 0, i64 %tmp253
+  store i32 %tmp256, i32* %tmp257, align 8
+  %tmp258 = or i64 %tmp243, 3
+  %tmp259 = trunc i64 %tmp258 to i32
+  %tmp260 = sub i32 94, %tmp259
+  %tmp261 = urem i32 %tmp260, 101
+  %tmp262 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp15, i64 0, i64 %tmp258
+  store i32 %tmp261, i32* %tmp262, align 4
+  %tmp263 = add nuw nsw i64 %tmp243, 4
+  %tmp264 = icmp eq i64 %tmp263, 100
+  br i1 %tmp264, label %bb265, label %bb242
+
+bb265:                                            ; preds = %bb265, %bb242
+  %tmp266 = phi i64 [ %tmp286, %bb265 ], [ 0, %bb242 ]
+  %tmp267 = trunc i64 %tmp266 to i32
+  %tmp268 = add i32 %tmp267, 92
+  %tmp269 = urem i32 %tmp268, 101
+  %tmp270 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp16, i64 0, i64 0, i64 %tmp266
+  store i32 %tmp269, i32* %tmp270, align 16
+  %tmp271 = or i64 %tmp266, 1
+  %tmp272 = trunc i64 %tmp271 to i32
+  %tmp273 = sub i32 92, %tmp272
+  %tmp274 = urem i32 %tmp273, 101
+  %tmp275 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp16, i64 0, i64 0, i64 %tmp271
+  store i32 %tmp274, i32* %tmp275, align 4
+  %tmp276 = or i64 %tmp266, 2
+  %tmp277 = trunc i64 %tmp276 to i32
+  %tmp278 = add i32 %tmp277, 92
+  %tmp279 = urem i32 %tmp278, 101
+  %tmp280 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp16, i64 0, i64 0, i64 %tmp276
+  store i32 %tmp279, i32* %tmp280, align 8
+  %tmp281 = or i64 %tmp266, 3
+  %tmp282 = trunc i64 %tmp281 to i32
+  %tmp283 = sub i32 92, %tmp282
+  %tmp284 = urem i32 %tmp283, 101
+  %tmp285 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp16, i64 0, i64 0, i64 %tmp281
+  store i32 %tmp284, i32* %tmp285, align 4
+  %tmp286 = add nuw nsw i64 %tmp266, 4
+  %tmp287 = icmp eq i64 %tmp286, 10000
+  br i1 %tmp287, label %bb288, label %bb265
+
+bb288:                                            ; preds = %bb288, %bb265
+  %tmp289 = phi i64 [ %tmp309, %bb288 ], [ 0, %bb265 ]
+  %tmp290 = trunc i64 %tmp289 to i32
+  %tmp291 = add i32 %tmp290, 87
+  %tmp292 = urem i32 %tmp291, 101
+  %tmp293 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp17, i64 0, i64 0, i64 %tmp289
+  store i32 %tmp292, i32* %tmp293, align 16
+  %tmp294 = or i64 %tmp289, 1
+  %tmp295 = trunc i64 %tmp294 to i32
+  %tmp296 = sub i32 87, %tmp295
+  %tmp297 = urem i32 %tmp296, 101
+  %tmp298 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp17, i64 0, i64 0, i64 %tmp294
+  store i32 %tmp297, i32* %tmp298, align 4
+  %tmp299 = or i64 %tmp289, 2
+  %tmp300 = trunc i64 %tmp299 to i32
+  %tmp301 = add i32 %tmp300, 87
+  %tmp302 = urem i32 %tmp301, 101
+  %tmp303 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp17, i64 0, i64 0, i64 %tmp299
+  store i32 %tmp302, i32* %tmp303, align 8
+  %tmp304 = or i64 %tmp289, 3
+  %tmp305 = trunc i64 %tmp304 to i32
+  %tmp306 = sub i32 87, %tmp305
+  %tmp307 = urem i32 %tmp306, 101
+  %tmp308 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp17, i64 0, i64 0, i64 %tmp304
+  store i32 %tmp307, i32* %tmp308, align 4
+  %tmp309 = add nuw nsw i64 %tmp289, 4
+  %tmp310 = icmp eq i64 %tmp309, 10000
+  br i1 %tmp310, label %bb311, label %bb288
+
+bb311:                                            ; preds = %bb311, %bb288
+  %tmp312 = phi i64 [ %tmp332, %bb311 ], [ 0, %bb288 ]
+  %tmp313 = trunc i64 %tmp312 to i32
+  %tmp314 = add i32 %tmp313, 28
+  %tmp315 = urem i32 %tmp314, 101
+  %tmp316 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp18, i64 0, i64 0, i64 %tmp312
+  store i32 %tmp315, i32* %tmp316, align 16
+  %tmp317 = or i64 %tmp312, 1
+  %tmp318 = trunc i64 %tmp317 to i32
+  %tmp319 = sub i32 28, %tmp318
+  %tmp320 = urem i32 %tmp319, 101
+  %tmp321 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp18, i64 0, i64 0, i64 %tmp317
+  store i32 %tmp320, i32* %tmp321, align 4
+  %tmp322 = or i64 %tmp312, 2
+  %tmp323 = trunc i64 %tmp322 to i32
+  %tmp324 = add i32 %tmp323, 28
+  %tmp325 = urem i32 %tmp324, 101
+  %tmp326 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp18, i64 0, i64 0, i64 %tmp322
+  store i32 %tmp325, i32* %tmp326, align 8
+  %tmp327 = or i64 %tmp312, 3
+  %tmp328 = trunc i64 %tmp327 to i32
+  %tmp329 = sub i32 28, %tmp328
+  %tmp330 = urem i32 %tmp329, 101
+  %tmp331 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp18, i64 0, i64 0, i64 %tmp327
+  store i32 %tmp330, i32* %tmp331, align 4
+  %tmp332 = add nuw nsw i64 %tmp312, 4
+  %tmp333 = icmp eq i64 %tmp332, 10000
+  br i1 %tmp333, label %bb334, label %bb311
+
+bb334:                                            ; preds = %bb311
+  %tmp335 = sub i32 87, %arg
+  %tmp336 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 69
+  %tmp337 = load i32, i32* %tmp336, align 4
+  %tmp338 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 68
+  %tmp339 = load i32, i32* %tmp338, align 16
+  br label %bb340
+
+bb340:                                            ; preds = %bb340, %bb334
+  %tmp341 = phi i32 [ %tmp339, %bb334 ], [ %tmp373, %bb340 ]
+  %tmp342 = phi i32 [ %tmp337, %bb334 ], [ %tmp379, %bb340 ]
+  %tmp343 = phi i64 [ 68, %bb334 ], [ %tmp371, %bb340 ]
+  %tmp344 = phi i32 [ %tmp335, %bb334 ], [ %tmp382, %bb340 ]
+  %tmp345 = phi i32 [ %arg2, %bb334 ], [ %tmp380, %bb340 ]
+  %tmp346 = add nsw i64 %tmp343, -1
+  %tmp347 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp346
+  %tmp348 = load i32, i32* %tmp347, align 4
+  %tmp349 = add nuw nsw i64 %tmp343, 1
+  %tmp350 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp349
+  %tmp351 = sub i32 %tmp342, %tmp348
+  store i32 %tmp351, i32* %tmp350, align 4
+  %tmp352 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp343
+  %tmp353 = load i32, i32* %tmp352, align 4
+  %tmp354 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp343
+  %tmp355 = add i32 %tmp341, %tmp353
+  store i32 %tmp355, i32* %tmp354, align 4
+  %tmp356 = add i32 %tmp345, -1
+  %tmp357 = sub i32 %tmp344, %tmp345
+  %tmp358 = sub i32 %tmp357, %tmp351
+  %tmp359 = add nsw i64 %tmp343, -2
+  %tmp360 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp359
+  %tmp361 = load i32, i32* %tmp360, align 4
+  %tmp362 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp343
+  %tmp363 = sub i32 %tmp355, %tmp361
+  store i32 %tmp363, i32* %tmp362, align 4
+  %tmp364 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp346
+  %tmp365 = load i32, i32* %tmp364, align 4
+  %tmp366 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp346
+  %tmp367 = add i32 %tmp348, %tmp365
+  store i32 %tmp367, i32* %tmp366, align 4
+  %tmp368 = add i32 %tmp345, -2
+  %tmp369 = sub i32 %tmp358, %tmp356
+  %tmp370 = sub i32 %tmp369, %tmp363
+  %tmp371 = add nsw i64 %tmp343, -3
+  %tmp372 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp371
+  %tmp373 = load i32, i32* %tmp372, align 4
+  %tmp374 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp346
+  %tmp375 = sub i32 %tmp367, %tmp373
+  store i32 %tmp375, i32* %tmp374, align 4
+  %tmp376 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp359
+  %tmp377 = load i32, i32* %tmp376, align 4
+  %tmp378 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp359
+  %tmp379 = add i32 %tmp361, %tmp377
+  store i32 %tmp379, i32* %tmp378, align 4
+  %tmp380 = add i32 %tmp345, -3
+  %tmp381 = sub i32 %tmp370, %tmp368
+  %tmp382 = sub i32 %tmp381, %tmp375
+  %tmp383 = icmp ugt i64 %tmp371, 2
+  br i1 %tmp383, label %bb340, label %bb384
+
+bb384:                                            ; preds = %bb340
+  %tmp385 = add i32 %arg2, -66
+  %tmp386 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 52
+  %tmp387 = load i32, i32* %tmp386, align 16
+  store i32 %tmp387, i32* %tmp32, align 4
+  %tmp388 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 97
+  %tmp389 = load i32, i32* %tmp388, align 4
+  %tmp390 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 31
+  %tmp391 = load i32, i32* %tmp390, align 4
+  %tmp392 = icmp eq i32 %tmp389, %tmp391
+  br i1 %tmp392, label %bb478, label %bb393
+
+bb393:                                            ; preds = %bb384
+  %tmp394 = sub i32 -79, %tmp382
+  %tmp395 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp12, i64 0, i64 2
+  %tmp396 = bitcast i32* %tmp395 to i8*
+  %tmp397 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp13, i64 0, i64 2
+  %tmp398 = bitcast i32* %tmp397 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %tmp396, i8* nonnull align 8 %tmp398, i64 304, i1 false)
+  br label %bb399
+
+bb399:                                            ; preds = %bb424, %bb393
+  %tmp400 = phi i64 [ 77, %bb393 ], [ %tmp425, %bb424 ]
+  br label %bb403
+
+bb401:                                            ; preds = %bb424
+  %tmp402 = add i32 %arg2, 3
+  br label %bb433
+
+bb403:                                            ; preds = %bb403, %bb399
+  %tmp404 = phi i64 [ 1, %bb399 ], [ %tmp414, %bb403 ]
+  %tmp405 = add nuw nsw i64 %tmp404, 1
+  %tmp406 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp9, i64 0, i64 %tmp404, i64 %tmp405
+  %tmp407 = load i32, i32* %tmp406, align 4
+  %tmp408 = add i32 %tmp394, %tmp407
+  store i32 %tmp408, i32* %tmp406, align 4
+  %tmp409 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp11, i64 0, i64 %tmp404, i64 %tmp405
+  %tmp410 = load i32, i32* %tmp409, align 4
+  %tmp411 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp405
+  %tmp412 = load i32, i32* %tmp411, align 4
+  %tmp413 = add i32 %tmp412, %tmp410
+  store i32 %tmp413, i32* %tmp411, align 4
+  %tmp414 = add nuw nsw i64 %tmp404, 2
+  %tmp415 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp9, i64 0, i64 %tmp405, i64 %tmp414
+  %tmp416 = load i32, i32* %tmp415, align 4
+  %tmp417 = add i32 %tmp394, %tmp416
+  store i32 %tmp417, i32* %tmp415, align 4
+  %tmp418 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp11, i64 0, i64 %tmp405, i64 %tmp414
+  %tmp419 = load i32, i32* %tmp418, align 4
+  %tmp420 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp414
+  %tmp421 = load i32, i32* %tmp420, align 4
+  %tmp422 = add i32 %tmp421, %tmp419
+  store i32 %tmp422, i32* %tmp420, align 4
+  %tmp423 = icmp eq i64 %tmp414, 47
+  br i1 %tmp423, label %bb424, label %bb403
+
+bb424:                                            ; preds = %bb403
+  %tmp425 = add nsw i64 %tmp400, -1
+  %tmp426 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 %tmp425
+  %tmp427 = load i32, i32* %tmp426, align 4
+  %tmp428 = add i32 %tmp427, 2
+  %tmp429 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp425
+  %tmp430 = load i32, i32* %tmp429, align 4
+  %tmp431 = mul i32 %tmp430, %tmp428
+  store i32 %tmp431, i32* %tmp429, align 4
+  %tmp432 = icmp ugt i64 %tmp425, 1
+  br i1 %tmp432, label %bb399, label %bb401
+
+bb433:                                            ; preds = %bb475, %bb401
+  %tmp434 = phi i64 [ 2, %bb401 ], [ %tmp437, %bb475 ]
+  %tmp435 = phi i32 [ 2, %bb401 ], [ %tmp476, %bb475 ]
+  %tmp436 = add nsw i64 %tmp434, -1
+  %tmp437 = add nuw nsw i64 %tmp434, 1
+  %tmp438 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp14, i64 0, i64 %tmp437, i64 %tmp434
+  %tmp439 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp17, i64 0, i64 %tmp436, i64 %tmp437
+  %tmp440 = mul i32 %tmp435, 47
+  br label %bb441
+
+bb441:                                            ; preds = %bb473, %bb433
+  %tmp442 = phi i64 [ 1, %bb433 ], [ %tmp450, %bb473 ]
+  %tmp443 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 %tmp442
+  %tmp444 = load i32, i32* %tmp443, align 4
+  %tmp445 = add nsw i64 %tmp442, -1
+  %tmp446 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp13, i64 0, i64 %tmp445
+  %tmp447 = load i32, i32* %tmp446, align 4
+  %tmp448 = xor i32 %tmp444, -1
+  %tmp449 = add i32 %tmp447, %tmp448
+  store i32 %tmp449, i32* %tmp446, align 4
+  %tmp450 = add nuw nsw i64 %tmp442, 1
+  %tmp451 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp14, i64 0, i64 %tmp436, i64 %tmp450
+  %tmp452 = load i32, i32* %tmp451, align 4
+  %tmp453 = mul i32 %tmp452, 91
+  %tmp454 = icmp eq i32 %tmp453, -30
+  br i1 %tmp454, label %bb455, label %bb473
+
+bb455:                                            ; preds = %bb441
+  %tmp456 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp15, i64 0, i64 %tmp442
+  %tmp457 = load i32, i32* %tmp456, align 4
+  %tmp458 = icmp ugt i32 %tmp457, %tmp402
+  br i1 %tmp458, label %bb459, label %bb473
+
+bb459:                                            ; preds = %bb455
+  %tmp460 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp16, i64 0, i64 %tmp445, i64 %tmp436
+  store i32 %tmp387, i32* %tmp460, align 4
+  %tmp461 = load i32, i32* %tmp57, align 4
+  %tmp462 = load i32, i32* %tmp438, align 4
+  %tmp463 = add i32 %tmp462, %tmp461
+  %tmp464 = load i32, i32* %tmp439, align 4
+  %tmp465 = add i32 %tmp464, 68
+  %tmp466 = icmp eq i32 %tmp463, %tmp465
+  br i1 %tmp466, label %bb471, label %bb467
+
+bb467:                                            ; preds = %bb459
+  %tmp468 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp450
+  %tmp469 = load i32, i32* %tmp468, align 4
+  %tmp470 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp445
+  store i32 %tmp469, i32* %tmp470, align 4
+  br label %bb473
+
+bb471:                                            ; preds = %bb459
+  %tmp472 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp18, i64 0, i64 %tmp437, i64 %tmp445
+  store i32 %tmp440, i32* %tmp472, align 4
+  br label %bb473
+
+bb473:                                            ; preds = %bb471, %bb467, %bb455, %bb441
+  %tmp474 = icmp eq i64 %tmp450, 13
+  br i1 %tmp474, label %bb475, label %bb441
+
+bb475:                                            ; preds = %bb473
+  %tmp476 = add nuw nsw i32 %tmp435, 1
+  %tmp477 = icmp eq i64 %tmp437, 69
+  br i1 %tmp477, label %bb478, label %bb433
+
+bb478:                                            ; preds = %bb475, %bb384
+  br label %bb479
+
+bb479:                                            ; preds = %bb479, %bb478
+  %tmp480 = phi i64 [ 0, %bb478 ], [ %tmp521, %bb479 ]
+  %tmp481 = phi i32 [ 0, %bb478 ], [ %tmp520, %bb479 ]
+  %tmp482 = and i64 %tmp480, 1
+  %tmp483 = icmp eq i64 %tmp482, 0
+  %tmp484 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp480
+  %tmp485 = load i32, i32* %tmp484, align 4
+  %tmp486 = sub i32 0, %tmp485
+  %tmp487 = select i1 %tmp483, i32 %tmp485, i32 %tmp486
+  %tmp488 = add i32 %tmp487, %tmp481
+  %tmp489 = add nuw nsw i64 %tmp480, 1
+  %tmp490 = and i64 %tmp489, 1
+  %tmp491 = icmp eq i64 %tmp490, 0
+  %tmp492 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp489
+  %tmp493 = load i32, i32* %tmp492, align 4
+  %tmp494 = sub i32 0, %tmp493
+  %tmp495 = select i1 %tmp491, i32 %tmp493, i32 %tmp494
+  %tmp496 = add i32 %tmp495, %tmp488
+  %tmp497 = add nuw nsw i64 %tmp480, 2
+  %tmp498 = and i64 %tmp497, 1
+  %tmp499 = icmp eq i64 %tmp498, 0
+  %tmp500 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp497
+  %tmp501 = load i32, i32* %tmp500, align 4
+  %tmp502 = sub i32 0, %tmp501
+  %tmp503 = select i1 %tmp499, i32 %tmp501, i32 %tmp502
+  %tmp504 = add i32 %tmp503, %tmp496
+  %tmp505 = add nuw nsw i64 %tmp480, 3
+  %tmp506 = and i64 %tmp505, 1
+  %tmp507 = icmp eq i64 %tmp506, 0
+  %tmp508 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp505
+  %tmp509 = load i32, i32* %tmp508, align 4
+  %tmp510 = sub i32 0, %tmp509
+  %tmp511 = select i1 %tmp507, i32 %tmp509, i32 %tmp510
+  %tmp512 = add i32 %tmp511, %tmp504
+  %tmp513 = add nuw nsw i64 %tmp480, 4
+  %tmp514 = and i64 %tmp513, 1
+  %tmp515 = icmp eq i64 %tmp514, 0
+  %tmp516 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp, i64 0, i64 %tmp513
+  %tmp517 = load i32, i32* %tmp516, align 4
+  %tmp518 = sub i32 0, %tmp517
+  %tmp519 = select i1 %tmp515, i32 %tmp517, i32 %tmp518
+  %tmp520 = add i32 %tmp519, %tmp512
+  %tmp521 = add nuw nsw i64 %tmp480, 5
+  %tmp522 = icmp eq i64 %tmp521, 100
+  br i1 %tmp522, label %bb523, label %bb479
+
+bb523:                                            ; preds = %bb523, %bb479
+  %tmp524 = phi i64 [ %tmp565, %bb523 ], [ 0, %bb479 ]
+  %tmp525 = phi i32 [ %tmp564, %bb523 ], [ 0, %bb479 ]
+  %tmp526 = and i64 %tmp524, 1
+  %tmp527 = icmp eq i64 %tmp526, 0
+  %tmp528 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp524
+  %tmp529 = load i32, i32* %tmp528, align 4
+  %tmp530 = sub i32 0, %tmp529
+  %tmp531 = select i1 %tmp527, i32 %tmp529, i32 %tmp530
+  %tmp532 = add i32 %tmp531, %tmp525
+  %tmp533 = add nuw nsw i64 %tmp524, 1
+  %tmp534 = and i64 %tmp533, 1
+  %tmp535 = icmp eq i64 %tmp534, 0
+  %tmp536 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp533
+  %tmp537 = load i32, i32* %tmp536, align 4
+  %tmp538 = sub i32 0, %tmp537
+  %tmp539 = select i1 %tmp535, i32 %tmp537, i32 %tmp538
+  %tmp540 = add i32 %tmp539, %tmp532
+  %tmp541 = add nuw nsw i64 %tmp524, 2
+  %tmp542 = and i64 %tmp541, 1
+  %tmp543 = icmp eq i64 %tmp542, 0
+  %tmp544 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp541
+  %tmp545 = load i32, i32* %tmp544, align 4
+  %tmp546 = sub i32 0, %tmp545
+  %tmp547 = select i1 %tmp543, i32 %tmp545, i32 %tmp546
+  %tmp548 = add i32 %tmp547, %tmp540
+  %tmp549 = add nuw nsw i64 %tmp524, 3
+  %tmp550 = and i64 %tmp549, 1
+  %tmp551 = icmp eq i64 %tmp550, 0
+  %tmp552 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp549
+  %tmp553 = load i32, i32* %tmp552, align 4
+  %tmp554 = sub i32 0, %tmp553
+  %tmp555 = select i1 %tmp551, i32 %tmp553, i32 %tmp554
+  %tmp556 = add i32 %tmp555, %tmp548
+  %tmp557 = add nuw nsw i64 %tmp524, 4
+  %tmp558 = and i64 %tmp557, 1
+  %tmp559 = icmp eq i64 %tmp558, 0
+  %tmp560 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp7, i64 0, i64 %tmp557
+  %tmp561 = load i32, i32* %tmp560, align 4
+  %tmp562 = sub i32 0, %tmp561
+  %tmp563 = select i1 %tmp559, i32 %tmp561, i32 %tmp562
+  %tmp564 = add i32 %tmp563, %tmp556
+  %tmp565 = add nuw nsw i64 %tmp524, 5
+  %tmp566 = icmp eq i64 %tmp565, 100
+  br i1 %tmp566, label %bb567, label %bb523
+
+bb567:                                            ; preds = %bb567, %bb523
+  %tmp568 = phi i64 [ %tmp609, %bb567 ], [ 0, %bb523 ]
+  %tmp569 = phi i32 [ %tmp608, %bb567 ], [ 0, %bb523 ]
+  %tmp570 = and i64 %tmp568, 1
+  %tmp571 = icmp eq i64 %tmp570, 0
+  %tmp572 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 %tmp568
+  %tmp573 = load i32, i32* %tmp572, align 4
+  %tmp574 = sub i32 0, %tmp573
+  %tmp575 = select i1 %tmp571, i32 %tmp573, i32 %tmp574
+  %tmp576 = add i32 %tmp575, %tmp569
+  %tmp577 = add nuw nsw i64 %tmp568, 1
+  %tmp578 = and i64 %tmp577, 1
+  %tmp579 = icmp eq i64 %tmp578, 0
+  %tmp580 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 %tmp577
+  %tmp581 = load i32, i32* %tmp580, align 4
+  %tmp582 = sub i32 0, %tmp581
+  %tmp583 = select i1 %tmp579, i32 %tmp581, i32 %tmp582
+  %tmp584 = add i32 %tmp583, %tmp576
+  %tmp585 = add nuw nsw i64 %tmp568, 2
+  %tmp586 = and i64 %tmp585, 1
+  %tmp587 = icmp eq i64 %tmp586, 0
+  %tmp588 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 %tmp585
+  %tmp589 = load i32, i32* %tmp588, align 4
+  %tmp590 = sub i32 0, %tmp589
+  %tmp591 = select i1 %tmp587, i32 %tmp589, i32 %tmp590
+  %tmp592 = add i32 %tmp591, %tmp584
+  %tmp593 = add nuw nsw i64 %tmp568, 3
+  %tmp594 = and i64 %tmp593, 1
+  %tmp595 = icmp eq i64 %tmp594, 0
+  %tmp596 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 %tmp593
+  %tmp597 = load i32, i32* %tmp596, align 4
+  %tmp598 = sub i32 0, %tmp597
+  %tmp599 = select i1 %tmp595, i32 %tmp597, i32 %tmp598
+  %tmp600 = add i32 %tmp599, %tmp592
+  %tmp601 = add nuw nsw i64 %tmp568, 4
+  %tmp602 = and i64 %tmp601, 1
+  %tmp603 = icmp eq i64 %tmp602, 0
+  %tmp604 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp8, i64 0, i64 %tmp601
+  %tmp605 = load i32, i32* %tmp604, align 4
+  %tmp606 = sub i32 0, %tmp605
+  %tmp607 = select i1 %tmp603, i32 %tmp605, i32 %tmp606
+  %tmp608 = add i32 %tmp607, %tmp600
+  %tmp609 = add nuw nsw i64 %tmp568, 5
+  %tmp610 = icmp eq i64 %tmp609, 100
+  br i1 %tmp610, label %bb611, label %bb567
+
+bb611:                                            ; preds = %bb611, %bb567
+  %tmp612 = phi i64 [ %tmp653, %bb611 ], [ 0, %bb567 ]
+  %tmp613 = phi i32 [ %tmp652, %bb611 ], [ 0, %bb567 ]
+  %tmp614 = and i64 %tmp612, 1
+  %tmp615 = icmp eq i64 %tmp614, 0
+  %tmp616 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp9, i64 0, i64 0, i64 %tmp612
+  %tmp617 = load i32, i32* %tmp616, align 4
+  %tmp618 = sub i32 0, %tmp617
+  %tmp619 = select i1 %tmp615, i32 %tmp617, i32 %tmp618
+  %tmp620 = add i32 %tmp619, %tmp613
+  %tmp621 = add nuw nsw i64 %tmp612, 1
+  %tmp622 = and i64 %tmp621, 1
+  %tmp623 = icmp eq i64 %tmp622, 0
+  %tmp624 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp9, i64 0, i64 0, i64 %tmp621
+  %tmp625 = load i32, i32* %tmp624, align 4
+  %tmp626 = sub i32 0, %tmp625
+  %tmp627 = select i1 %tmp623, i32 %tmp625, i32 %tmp626
+  %tmp628 = add i32 %tmp627, %tmp620
+  %tmp629 = add nuw nsw i64 %tmp612, 2
+  %tmp630 = and i64 %tmp629, 1
+  %tmp631 = icmp eq i64 %tmp630, 0
+  %tmp632 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp9, i64 0, i64 0, i64 %tmp629
+  %tmp633 = load i32, i32* %tmp632, align 4
+  %tmp634 = sub i32 0, %tmp633
+  %tmp635 = select i1 %tmp631, i32 %tmp633, i32 %tmp634
+  %tmp636 = add i32 %tmp635, %tmp628
+  %tmp637 = add nuw nsw i64 %tmp612, 3
+  %tmp638 = and i64 %tmp637, 1
+  %tmp639 = icmp eq i64 %tmp638, 0
+  %tmp640 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp9, i64 0, i64 0, i64 %tmp637
+  %tmp641 = load i32, i32* %tmp640, align 4
+  %tmp642 = sub i32 0, %tmp641
+  %tmp643 = select i1 %tmp639, i32 %tmp641, i32 %tmp642
+  %tmp644 = add i32 %tmp643, %tmp636
+  %tmp645 = add nuw nsw i64 %tmp612, 4
+  %tmp646 = and i64 %tmp645, 1
+  %tmp647 = icmp eq i64 %tmp646, 0
+  %tmp648 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp9, i64 0, i64 0, i64 %tmp645
+  %tmp649 = load i32, i32* %tmp648, align 4
+  %tmp650 = sub i32 0, %tmp649
+  %tmp651 = select i1 %tmp647, i32 %tmp649, i32 %tmp650
+  %tmp652 = add i32 %tmp651, %tmp644
+  %tmp653 = add nuw nsw i64 %tmp612, 5
+  %tmp654 = icmp eq i64 %tmp653, 10000
+  br i1 %tmp654, label %bb655, label %bb611
+
+bb655:                                            ; preds = %bb655, %bb611
+  %tmp656 = phi i64 [ %tmp697, %bb655 ], [ 0, %bb611 ]
+  %tmp657 = phi i32 [ %tmp696, %bb655 ], [ 0, %bb611 ]
+  %tmp658 = and i64 %tmp656, 1
+  %tmp659 = icmp eq i64 %tmp658, 0
+  %tmp660 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp656
+  %tmp661 = load i32, i32* %tmp660, align 4
+  %tmp662 = sub i32 0, %tmp661
+  %tmp663 = select i1 %tmp659, i32 %tmp661, i32 %tmp662
+  %tmp664 = add i32 %tmp663, %tmp657
+  %tmp665 = add nuw nsw i64 %tmp656, 1
+  %tmp666 = and i64 %tmp665, 1
+  %tmp667 = icmp eq i64 %tmp666, 0
+  %tmp668 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp665
+  %tmp669 = load i32, i32* %tmp668, align 4
+  %tmp670 = sub i32 0, %tmp669
+  %tmp671 = select i1 %tmp667, i32 %tmp669, i32 %tmp670
+  %tmp672 = add i32 %tmp671, %tmp664
+  %tmp673 = add nuw nsw i64 %tmp656, 2
+  %tmp674 = and i64 %tmp673, 1
+  %tmp675 = icmp eq i64 %tmp674, 0
+  %tmp676 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp673
+  %tmp677 = load i32, i32* %tmp676, align 4
+  %tmp678 = sub i32 0, %tmp677
+  %tmp679 = select i1 %tmp675, i32 %tmp677, i32 %tmp678
+  %tmp680 = add i32 %tmp679, %tmp672
+  %tmp681 = add nuw nsw i64 %tmp656, 3
+  %tmp682 = and i64 %tmp681, 1
+  %tmp683 = icmp eq i64 %tmp682, 0
+  %tmp684 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp681
+  %tmp685 = load i32, i32* %tmp684, align 4
+  %tmp686 = sub i32 0, %tmp685
+  %tmp687 = select i1 %tmp683, i32 %tmp685, i32 %tmp686
+  %tmp688 = add i32 %tmp687, %tmp680
+  %tmp689 = add nuw nsw i64 %tmp656, 4
+  %tmp690 = and i64 %tmp689, 1
+  %tmp691 = icmp eq i64 %tmp690, 0
+  %tmp692 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp10, i64 0, i64 %tmp689
+  %tmp693 = load i32, i32* %tmp692, align 4
+  %tmp694 = sub i32 0, %tmp693
+  %tmp695 = select i1 %tmp691, i32 %tmp693, i32 %tmp694
+  %tmp696 = add i32 %tmp695, %tmp688
+  %tmp697 = add nuw nsw i64 %tmp656, 5
+  %tmp698 = icmp eq i64 %tmp697, 100
+  br i1 %tmp698, label %bb699, label %bb655
+
+bb699:                                            ; preds = %bb699, %bb655
+  %tmp700 = phi i64 [ %tmp741, %bb699 ], [ 0, %bb655 ]
+  %tmp701 = phi i32 [ %tmp740, %bb699 ], [ 0, %bb655 ]
+  %tmp702 = and i64 %tmp700, 1
+  %tmp703 = icmp eq i64 %tmp702, 0
+  %tmp704 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp11, i64 0, i64 0, i64 %tmp700
+  %tmp705 = load i32, i32* %tmp704, align 4
+  %tmp706 = sub i32 0, %tmp705
+  %tmp707 = select i1 %tmp703, i32 %tmp705, i32 %tmp706
+  %tmp708 = add i32 %tmp707, %tmp701
+  %tmp709 = add nuw nsw i64 %tmp700, 1
+  %tmp710 = and i64 %tmp709, 1
+  %tmp711 = icmp eq i64 %tmp710, 0
+  %tmp712 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp11, i64 0, i64 0, i64 %tmp709
+  %tmp713 = load i32, i32* %tmp712, align 4
+  %tmp714 = sub i32 0, %tmp713
+  %tmp715 = select i1 %tmp711, i32 %tmp713, i32 %tmp714
+  %tmp716 = add i32 %tmp715, %tmp708
+  %tmp717 = add nuw nsw i64 %tmp700, 2
+  %tmp718 = and i64 %tmp717, 1
+  %tmp719 = icmp eq i64 %tmp718, 0
+  %tmp720 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp11, i64 0, i64 0, i64 %tmp717
+  %tmp721 = load i32, i32* %tmp720, align 4
+  %tmp722 = sub i32 0, %tmp721
+  %tmp723 = select i1 %tmp719, i32 %tmp721, i32 %tmp722
+  %tmp724 = add i32 %tmp723, %tmp716
+  %tmp725 = add nuw nsw i64 %tmp700, 3
+  %tmp726 = and i64 %tmp725, 1
+  %tmp727 = icmp eq i64 %tmp726, 0
+  %tmp728 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp11, i64 0, i64 0, i64 %tmp725
+  %tmp729 = load i32, i32* %tmp728, align 4
+  %tmp730 = sub i32 0, %tmp729
+  %tmp731 = select i1 %tmp727, i32 %tmp729, i32 %tmp730
+  %tmp732 = add i32 %tmp731, %tmp724
+  %tmp733 = add nuw nsw i64 %tmp700, 4
+  %tmp734 = and i64 %tmp733, 1
+  %tmp735 = icmp eq i64 %tmp734, 0
+  %tmp736 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp11, i64 0, i64 0, i64 %tmp733
+  %tmp737 = load i32, i32* %tmp736, align 4
+  %tmp738 = sub i32 0, %tmp737
+  %tmp739 = select i1 %tmp735, i32 %tmp737, i32 %tmp738
+  %tmp740 = add i32 %tmp739, %tmp732
+  %tmp741 = add nuw nsw i64 %tmp700, 5
+  %tmp742 = icmp eq i64 %tmp741, 10000
+  br i1 %tmp742, label %bb743, label %bb699
+
+bb743:                                            ; preds = %bb743, %bb699
+  %tmp744 = phi i64 [ %tmp785, %bb743 ], [ 0, %bb699 ]
+  %tmp745 = phi i32 [ %tmp784, %bb743 ], [ 0, %bb699 ]
+  %tmp746 = and i64 %tmp744, 1
+  %tmp747 = icmp eq i64 %tmp746, 0
+  %tmp748 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp12, i64 0, i64 %tmp744
+  %tmp749 = load i32, i32* %tmp748, align 4
+  %tmp750 = sub i32 0, %tmp749
+  %tmp751 = select i1 %tmp747, i32 %tmp749, i32 %tmp750
+  %tmp752 = add i32 %tmp751, %tmp745
+  %tmp753 = add nuw nsw i64 %tmp744, 1
+  %tmp754 = and i64 %tmp753, 1
+  %tmp755 = icmp eq i64 %tmp754, 0
+  %tmp756 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp12, i64 0, i64 %tmp753
+  %tmp757 = load i32, i32* %tmp756, align 4
+  %tmp758 = sub i32 0, %tmp757
+  %tmp759 = select i1 %tmp755, i32 %tmp757, i32 %tmp758
+  %tmp760 = add i32 %tmp759, %tmp752
+  %tmp761 = add nuw nsw i64 %tmp744, 2
+  %tmp762 = and i64 %tmp761, 1
+  %tmp763 = icmp eq i64 %tmp762, 0
+  %tmp764 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp12, i64 0, i64 %tmp761
+  %tmp765 = load i32, i32* %tmp764, align 4
+  %tmp766 = sub i32 0, %tmp765
+  %tmp767 = select i1 %tmp763, i32 %tmp765, i32 %tmp766
+  %tmp768 = add i32 %tmp767, %tmp760
+  %tmp769 = add nuw nsw i64 %tmp744, 3
+  %tmp770 = and i64 %tmp769, 1
+  %tmp771 = icmp eq i64 %tmp770, 0
+  %tmp772 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp12, i64 0, i64 %tmp769
+  %tmp773 = load i32, i32* %tmp772, align 4
+  %tmp774 = sub i32 0, %tmp773
+  %tmp775 = select i1 %tmp771, i32 %tmp773, i32 %tmp774
+  %tmp776 = add i32 %tmp775, %tmp768
+  %tmp777 = add nuw nsw i64 %tmp744, 4
+  %tmp778 = and i64 %tmp777, 1
+  %tmp779 = icmp eq i64 %tmp778, 0
+  %tmp780 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp12, i64 0, i64 %tmp777
+  %tmp781 = load i32, i32* %tmp780, align 4
+  %tmp782 = sub i32 0, %tmp781
+  %tmp783 = select i1 %tmp779, i32 %tmp781, i32 %tmp782
+  %tmp784 = add i32 %tmp783, %tmp776
+  %tmp785 = add nuw nsw i64 %tmp744, 5
+  %tmp786 = icmp eq i64 %tmp785, 100
+  br i1 %tmp786, label %bb787, label %bb743
+
+bb787:                                            ; preds = %bb787, %bb743
+  %tmp788 = phi i64 [ %tmp829, %bb787 ], [ 0, %bb743 ]
+  %tmp789 = phi i32 [ %tmp828, %bb787 ], [ 0, %bb743 ]
+  %tmp790 = and i64 %tmp788, 1
+  %tmp791 = icmp eq i64 %tmp790, 0
+  %tmp792 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp13, i64 0, i64 %tmp788
+  %tmp793 = load i32, i32* %tmp792, align 4
+  %tmp794 = sub i32 0, %tmp793
+  %tmp795 = select i1 %tmp791, i32 %tmp793, i32 %tmp794
+  %tmp796 = add i32 %tmp795, %tmp789
+  %tmp797 = add nuw nsw i64 %tmp788, 1
+  %tmp798 = and i64 %tmp797, 1
+  %tmp799 = icmp eq i64 %tmp798, 0
+  %tmp800 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp13, i64 0, i64 %tmp797
+  %tmp801 = load i32, i32* %tmp800, align 4
+  %tmp802 = sub i32 0, %tmp801
+  %tmp803 = select i1 %tmp799, i32 %tmp801, i32 %tmp802
+  %tmp804 = add i32 %tmp803, %tmp796
+  %tmp805 = add nuw nsw i64 %tmp788, 2
+  %tmp806 = and i64 %tmp805, 1
+  %tmp807 = icmp eq i64 %tmp806, 0
+  %tmp808 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp13, i64 0, i64 %tmp805
+  %tmp809 = load i32, i32* %tmp808, align 4
+  %tmp810 = sub i32 0, %tmp809
+  %tmp811 = select i1 %tmp807, i32 %tmp809, i32 %tmp810
+  %tmp812 = add i32 %tmp811, %tmp804
+  %tmp813 = add nuw nsw i64 %tmp788, 3
+  %tmp814 = and i64 %tmp813, 1
+  %tmp815 = icmp eq i64 %tmp814, 0
+  %tmp816 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp13, i64 0, i64 %tmp813
+  %tmp817 = load i32, i32* %tmp816, align 4
+  %tmp818 = sub i32 0, %tmp817
+  %tmp819 = select i1 %tmp815, i32 %tmp817, i32 %tmp818
+  %tmp820 = add i32 %tmp819, %tmp812
+  %tmp821 = add nuw nsw i64 %tmp788, 4
+  %tmp822 = and i64 %tmp821, 1
+  %tmp823 = icmp eq i64 %tmp822, 0
+  %tmp824 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp13, i64 0, i64 %tmp821
+  %tmp825 = load i32, i32* %tmp824, align 4
+  %tmp826 = sub i32 0, %tmp825
+  %tmp827 = select i1 %tmp823, i32 %tmp825, i32 %tmp826
+  %tmp828 = add i32 %tmp827, %tmp820
+  %tmp829 = add nuw nsw i64 %tmp788, 5
+  %tmp830 = icmp eq i64 %tmp829, 100
+  br i1 %tmp830, label %bb831, label %bb787
+
+bb831:                                            ; preds = %bb831, %bb787
+  %tmp832 = phi i64 [ %tmp873, %bb831 ], [ 0, %bb787 ]
+  %tmp833 = phi i32 [ %tmp872, %bb831 ], [ 0, %bb787 ]
+  %tmp834 = and i64 %tmp832, 1
+  %tmp835 = icmp eq i64 %tmp834, 0
+  %tmp836 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp14, i64 0, i64 0, i64 %tmp832
+  %tmp837 = load i32, i32* %tmp836, align 4
+  %tmp838 = sub i32 0, %tmp837
+  %tmp839 = select i1 %tmp835, i32 %tmp837, i32 %tmp838
+  %tmp840 = add i32 %tmp839, %tmp833
+  %tmp841 = add nuw nsw i64 %tmp832, 1
+  %tmp842 = and i64 %tmp841, 1
+  %tmp843 = icmp eq i64 %tmp842, 0
+  %tmp844 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp14, i64 0, i64 0, i64 %tmp841
+  %tmp845 = load i32, i32* %tmp844, align 4
+  %tmp846 = sub i32 0, %tmp845
+  %tmp847 = select i1 %tmp843, i32 %tmp845, i32 %tmp846
+  %tmp848 = add i32 %tmp847, %tmp840
+  %tmp849 = add nuw nsw i64 %tmp832, 2
+  %tmp850 = and i64 %tmp849, 1
+  %tmp851 = icmp eq i64 %tmp850, 0
+  %tmp852 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp14, i64 0, i64 0, i64 %tmp849
+  %tmp853 = load i32, i32* %tmp852, align 4
+  %tmp854 = sub i32 0, %tmp853
+  %tmp855 = select i1 %tmp851, i32 %tmp853, i32 %tmp854
+  %tmp856 = add i32 %tmp855, %tmp848
+  %tmp857 = add nuw nsw i64 %tmp832, 3
+  %tmp858 = and i64 %tmp857, 1
+  %tmp859 = icmp eq i64 %tmp858, 0
+  %tmp860 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp14, i64 0, i64 0, i64 %tmp857
+  %tmp861 = load i32, i32* %tmp860, align 4
+  %tmp862 = sub i32 0, %tmp861
+  %tmp863 = select i1 %tmp859, i32 %tmp861, i32 %tmp862
+  %tmp864 = add i32 %tmp863, %tmp856
+  %tmp865 = add nuw nsw i64 %tmp832, 4
+  %tmp866 = and i64 %tmp865, 1
+  %tmp867 = icmp eq i64 %tmp866, 0
+  %tmp868 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp14, i64 0, i64 0, i64 %tmp865
+  %tmp869 = load i32, i32* %tmp868, align 4
+  %tmp870 = sub i32 0, %tmp869
+  %tmp871 = select i1 %tmp867, i32 %tmp869, i32 %tmp870
+  %tmp872 = add i32 %tmp871, %tmp864
+  %tmp873 = add nuw nsw i64 %tmp832, 5
+  %tmp874 = icmp eq i64 %tmp873, 10000
+  br i1 %tmp874, label %bb875, label %bb831
+
+bb875:                                            ; preds = %bb875, %bb831
+  %tmp876 = phi i64 [ %tmp917, %bb875 ], [ 0, %bb831 ]
+  %tmp877 = phi i32 [ %tmp916, %bb875 ], [ 0, %bb831 ]
+  %tmp878 = and i64 %tmp876, 1
+  %tmp879 = icmp eq i64 %tmp878, 0
+  %tmp880 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp15, i64 0, i64 %tmp876
+  %tmp881 = load i32, i32* %tmp880, align 4
+  %tmp882 = sub i32 0, %tmp881
+  %tmp883 = select i1 %tmp879, i32 %tmp881, i32 %tmp882
+  %tmp884 = add i32 %tmp883, %tmp877
+  %tmp885 = add nuw nsw i64 %tmp876, 1
+  %tmp886 = and i64 %tmp885, 1
+  %tmp887 = icmp eq i64 %tmp886, 0
+  %tmp888 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp15, i64 0, i64 %tmp885
+  %tmp889 = load i32, i32* %tmp888, align 4
+  %tmp890 = sub i32 0, %tmp889
+  %tmp891 = select i1 %tmp887, i32 %tmp889, i32 %tmp890
+  %tmp892 = add i32 %tmp891, %tmp884
+  %tmp893 = add nuw nsw i64 %tmp876, 2
+  %tmp894 = and i64 %tmp893, 1
+  %tmp895 = icmp eq i64 %tmp894, 0
+  %tmp896 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp15, i64 0, i64 %tmp893
+  %tmp897 = load i32, i32* %tmp896, align 4
+  %tmp898 = sub i32 0, %tmp897
+  %tmp899 = select i1 %tmp895, i32 %tmp897, i32 %tmp898
+  %tmp900 = add i32 %tmp899, %tmp892
+  %tmp901 = add nuw nsw i64 %tmp876, 3
+  %tmp902 = and i64 %tmp901, 1
+  %tmp903 = icmp eq i64 %tmp902, 0
+  %tmp904 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp15, i64 0, i64 %tmp901
+  %tmp905 = load i32, i32* %tmp904, align 4
+  %tmp906 = sub i32 0, %tmp905
+  %tmp907 = select i1 %tmp903, i32 %tmp905, i32 %tmp906
+  %tmp908 = add i32 %tmp907, %tmp900
+  %tmp909 = add nuw nsw i64 %tmp876, 4
+  %tmp910 = and i64 %tmp909, 1
+  %tmp911 = icmp eq i64 %tmp910, 0
+  %tmp912 = getelementptr inbounds [100 x i32], [100 x i32]* %tmp15, i64 0, i64 %tmp909
+  %tmp913 = load i32, i32* %tmp912, align 4
+  %tmp914 = sub i32 0, %tmp913
+  %tmp915 = select i1 %tmp911, i32 %tmp913, i32 %tmp914
+  %tmp916 = add i32 %tmp915, %tmp908
+  %tmp917 = add nuw nsw i64 %tmp876, 5
+  %tmp918 = icmp eq i64 %tmp917, 100
+  br i1 %tmp918, label %bb919, label %bb875
+
+bb919:                                            ; preds = %bb919, %bb875
+  %tmp920 = phi i64 [ %tmp961, %bb919 ], [ 0, %bb875 ]
+  %tmp921 = phi i32 [ %tmp960, %bb919 ], [ 0, %bb875 ]
+  %tmp922 = and i64 %tmp920, 1
+  %tmp923 = icmp eq i64 %tmp922, 0
+  %tmp924 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp16, i64 0, i64 0, i64 %tmp920
+  %tmp925 = load i32, i32* %tmp924, align 4
+  %tmp926 = sub i32 0, %tmp925
+  %tmp927 = select i1 %tmp923, i32 %tmp925, i32 %tmp926
+  %tmp928 = add i32 %tmp927, %tmp921
+  %tmp929 = add nuw nsw i64 %tmp920, 1
+  %tmp930 = and i64 %tmp929, 1
+  %tmp931 = icmp eq i64 %tmp930, 0
+  %tmp932 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp16, i64 0, i64 0, i64 %tmp929
+  %tmp933 = load i32, i32* %tmp932, align 4
+  %tmp934 = sub i32 0, %tmp933
+  %tmp935 = select i1 %tmp931, i32 %tmp933, i32 %tmp934
+  %tmp936 = add i32 %tmp935, %tmp928
+  %tmp937 = add nuw nsw i64 %tmp920, 2
+  %tmp938 = and i64 %tmp937, 1
+  %tmp939 = icmp eq i64 %tmp938, 0
+  %tmp940 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp16, i64 0, i64 0, i64 %tmp937
+  %tmp941 = load i32, i32* %tmp940, align 4
+  %tmp942 = sub i32 0, %tmp941
+  %tmp943 = select i1 %tmp939, i32 %tmp941, i32 %tmp942
+  %tmp944 = add i32 %tmp943, %tmp936
+  %tmp945 = add nuw nsw i64 %tmp920, 3
+  %tmp946 = and i64 %tmp945, 1
+  %tmp947 = icmp eq i64 %tmp946, 0
+  %tmp948 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp16, i64 0, i64 0, i64 %tmp945
+  %tmp949 = load i32, i32* %tmp948, align 4
+  %tmp950 = sub i32 0, %tmp949
+  %tmp951 = select i1 %tmp947, i32 %tmp949, i32 %tmp950
+  %tmp952 = add i32 %tmp951, %tmp944
+  %tmp953 = add nuw nsw i64 %tmp920, 4
+  %tmp954 = and i64 %tmp953, 1
+  %tmp955 = icmp eq i64 %tmp954, 0
+  %tmp956 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp16, i64 0, i64 0, i64 %tmp953
+  %tmp957 = load i32, i32* %tmp956, align 4
+  %tmp958 = sub i32 0, %tmp957
+  %tmp959 = select i1 %tmp955, i32 %tmp957, i32 %tmp958
+  %tmp960 = add i32 %tmp959, %tmp952
+  %tmp961 = add nuw nsw i64 %tmp920, 5
+  %tmp962 = icmp eq i64 %tmp961, 10000
+  br i1 %tmp962, label %bb963, label %bb919
+
+bb963:                                            ; preds = %bb963, %bb919
+  %tmp964 = phi i64 [ %tmp1005, %bb963 ], [ 0, %bb919 ]
+  %tmp965 = phi i32 [ %tmp1004, %bb963 ], [ 0, %bb919 ]
+  %tmp966 = and i64 %tmp964, 1
+  %tmp967 = icmp eq i64 %tmp966, 0
+  %tmp968 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp17, i64 0, i64 0, i64 %tmp964
+  %tmp969 = load i32, i32* %tmp968, align 4
+  %tmp970 = sub i32 0, %tmp969
+  %tmp971 = select i1 %tmp967, i32 %tmp969, i32 %tmp970
+  %tmp972 = add i32 %tmp971, %tmp965
+  %tmp973 = add nuw nsw i64 %tmp964, 1
+  %tmp974 = and i64 %tmp973, 1
+  %tmp975 = icmp eq i64 %tmp974, 0
+  %tmp976 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp17, i64 0, i64 0, i64 %tmp973
+  %tmp977 = load i32, i32* %tmp976, align 4
+  %tmp978 = sub i32 0, %tmp977
+  %tmp979 = select i1 %tmp975, i32 %tmp977, i32 %tmp978
+  %tmp980 = add i32 %tmp979, %tmp972
+  %tmp981 = add nuw nsw i64 %tmp964, 2
+  %tmp982 = and i64 %tmp981, 1
+  %tmp983 = icmp eq i64 %tmp982, 0
+  %tmp984 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp17, i64 0, i64 0, i64 %tmp981
+  %tmp985 = load i32, i32* %tmp984, align 4
+  %tmp986 = sub i32 0, %tmp985
+  %tmp987 = select i1 %tmp983, i32 %tmp985, i32 %tmp986
+  %tmp988 = add i32 %tmp987, %tmp980
+  %tmp989 = add nuw nsw i64 %tmp964, 3
+  %tmp990 = and i64 %tmp989, 1
+  %tmp991 = icmp eq i64 %tmp990, 0
+  %tmp992 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp17, i64 0, i64 0, i64 %tmp989
+  %tmp993 = load i32, i32* %tmp992, align 4
+  %tmp994 = sub i32 0, %tmp993
+  %tmp995 = select i1 %tmp991, i32 %tmp993, i32 %tmp994
+  %tmp996 = add i32 %tmp995, %tmp988
+  %tmp997 = add nuw nsw i64 %tmp964, 4
+  %tmp998 = and i64 %tmp997, 1
+  %tmp999 = icmp eq i64 %tmp998, 0
+  %tmp1000 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp17, i64 0, i64 0, i64 %tmp997
+  %tmp1001 = load i32, i32* %tmp1000, align 4
+  %tmp1002 = sub i32 0, %tmp1001
+  %tmp1003 = select i1 %tmp999, i32 %tmp1001, i32 %tmp1002
+  %tmp1004 = add i32 %tmp1003, %tmp996
+  %tmp1005 = add nuw nsw i64 %tmp964, 5
+  %tmp1006 = icmp eq i64 %tmp1005, 10000
+  br i1 %tmp1006, label %bb1007, label %bb963
+
+bb1007:                                           ; preds = %bb1007, %bb963
+  %tmp1008 = phi i64 [ %tmp1049, %bb1007 ], [ 0, %bb963 ]
+  %tmp1009 = phi i32 [ %tmp1048, %bb1007 ], [ 0, %bb963 ]
+  %tmp1010 = and i64 %tmp1008, 1
+  %tmp1011 = icmp eq i64 %tmp1010, 0
+  %tmp1012 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp18, i64 0, i64 0, i64 %tmp1008
+  %tmp1013 = load i32, i32* %tmp1012, align 4
+  %tmp1014 = sub i32 0, %tmp1013
+  %tmp1015 = select i1 %tmp1011, i32 %tmp1013, i32 %tmp1014
+  %tmp1016 = add i32 %tmp1015, %tmp1009
+  %tmp1017 = add nuw nsw i64 %tmp1008, 1
+  %tmp1018 = and i64 %tmp1017, 1
+  %tmp1019 = icmp eq i64 %tmp1018, 0
+  %tmp1020 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp18, i64 0, i64 0, i64 %tmp1017
+  %tmp1021 = load i32, i32* %tmp1020, align 4
+  %tmp1022 = sub i32 0, %tmp1021
+  %tmp1023 = select i1 %tmp1019, i32 %tmp1021, i32 %tmp1022
+  %tmp1024 = add i32 %tmp1023, %tmp1016
+  %tmp1025 = add nuw nsw i64 %tmp1008, 2
+  %tmp1026 = and i64 %tmp1025, 1
+  %tmp1027 = icmp eq i64 %tmp1026, 0
+  %tmp1028 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp18, i64 0, i64 0, i64 %tmp1025
+  %tmp1029 = load i32, i32* %tmp1028, align 4
+  %tmp1030 = sub i32 0, %tmp1029
+  %tmp1031 = select i1 %tmp1027, i32 %tmp1029, i32 %tmp1030
+  %tmp1032 = add i32 %tmp1031, %tmp1024
+  %tmp1033 = add nuw nsw i64 %tmp1008, 3
+  %tmp1034 = and i64 %tmp1033, 1
+  %tmp1035 = icmp eq i64 %tmp1034, 0
+  %tmp1036 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp18, i64 0, i64 0, i64 %tmp1033
+  %tmp1037 = load i32, i32* %tmp1036, align 4
+  %tmp1038 = sub i32 0, %tmp1037
+  %tmp1039 = select i1 %tmp1035, i32 %tmp1037, i32 %tmp1038
+  %tmp1040 = add i32 %tmp1039, %tmp1032
+  %tmp1041 = add nuw nsw i64 %tmp1008, 4
+  %tmp1042 = and i64 %tmp1041, 1
+  %tmp1043 = icmp eq i64 %tmp1042, 0
+  %tmp1044 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %tmp18, i64 0, i64 0, i64 %tmp1041
+  %tmp1045 = load i32, i32* %tmp1044, align 4
+  %tmp1046 = sub i32 0, %tmp1045
+  %tmp1047 = select i1 %tmp1043, i32 %tmp1045, i32 %tmp1046
+  %tmp1048 = add i32 %tmp1047, %tmp1040
+  %tmp1049 = add nuw nsw i64 %tmp1008, 5
+  %tmp1050 = icmp eq i64 %tmp1049, 10000
+  br i1 %tmp1050, label %bb1051, label %bb1007
+
+bb1051:                                           ; preds = %bb1007
+  %tmp1052 = add i32 %tmp382, %tmp385
+  %tmp1053 = add i32 %tmp1052, %tmp520
+  %tmp1054 = add i32 %tmp1053, %tmp564
+  %tmp1055 = sub i32 %tmp1054, %tmp608
+  %tmp1056 = add i32 %tmp1055, %tmp652
+  %tmp1057 = sub i32 %tmp1056, %tmp696
+  %tmp1058 = add i32 %tmp1057, %tmp740
+  %tmp1059 = sub i32 %tmp1058, %tmp784
+  %tmp1060 = add i32 %tmp1059, %tmp828
+  %tmp1061 = sub i32 %tmp1060, %tmp872
+  %tmp1062 = add i32 %tmp1061, %tmp916
+  %tmp1063 = sub i32 %tmp1062, %tmp960
+  %tmp1064 = add i32 %tmp1063, %tmp1004
+  %tmp1065 = sub i32 %tmp1064, %tmp1048
+  call void @llvm.lifetime.end.p0i8(i64 40000, i8* nonnull %tmp31) #4
+  call void @llvm.lifetime.end.p0i8(i64 40000, i8* nonnull %tmp30) #4
+  call void @llvm.lifetime.end.p0i8(i64 40000, i8* nonnull %tmp29) #4
+  call void @llvm.lifetime.end.p0i8(i64 400, i8* nonnull %tmp28) #4
+  call void @llvm.lifetime.end.p0i8(i64 40000, i8* nonnull %tmp27) #4
+  call void @llvm.lifetime.end.p0i8(i64 400, i8* nonnull %tmp26) #4
+  call void @llvm.lifetime.end.p0i8(i64 400, i8* nonnull %tmp25) #4
+  call void @llvm.lifetime.end.p0i8(i64 40000, i8* nonnull %tmp24) #4
+  call void @llvm.lifetime.end.p0i8(i64 400, i8* nonnull %tmp23) #4
+  call void @llvm.lifetime.end.p0i8(i64 40000, i8* nonnull %tmp22) #4
+  call void @llvm.lifetime.end.p0i8(i64 400, i8* nonnull %tmp21) #4
+  call void @llvm.lifetime.end.p0i8(i64 400, i8* nonnull %tmp20) #4
+  call void @llvm.lifetime.end.p0i8(i64 400, i8* nonnull %tmp19) #4
+  ret i32 %tmp1065
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/lsr-overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/lsr-overflow.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/lsr-overflow.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/lsr-overflow.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -lsr-complexity-limit=50 -loop-reduce -S %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @overflow1(i64 %a) {
+; CHECK-LABEL: @overflow1(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[A]], -9223372036854775808
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[BB1]] ], [ [[TMP1]], [[BB:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB1]] ], [ [[TMP0]], [[BB]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[LSR_IV1]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP4]], true
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 1
+; CHECK-NEXT:    [[LSR_IV_NEXT2]] = add i64 [[LSR_IV1]], 1
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB1]], label [[BB7:%.*]]
+; CHECK:       bb7:
+; CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[LSR_IV_NEXT]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 0
+; CHECK-NEXT:    unreachable
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp = phi i64 [ %a, %bb ], [ %tmp6, %bb1 ]
+  %tmp4 = icmp ne i64 %tmp, -9223372036854775808
+  %tmp5 = and i1 %tmp4, 1
+  %tmp6 = add i64 %tmp, 1
+  br i1 %tmp5, label %bb1, label %bb7
+
+bb7:                                              ; preds = %bb1
+  %tmp9 = and i64 %tmp, 1
+  %tmp10 = icmp eq i64 %tmp9, 0
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/missing-phi-operand-update.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,101 @@
+; PR41445: This test checks the case when LSR split critical edge
+; and phi node has other pending fixup operands
+
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; We have %indvars.iv.lcssa phi node where 4 input operands
+; need to be rewritten: %tmp1, %tmp2, %tmp3, %tmp4.
+; When we try to rewrite %tmp1, we first split the critical edge.
+; All the other PHI inputs besides %tmp1 go to a new phi node.
+; This test checks that LSR is still able to rewrite %tmp2, %tmp3, %tmp4.
+define i32 @foo(i32* %A, i32 %t) {
+entry:
+  br label %loop.32
+
+loop.exit:                                        ; preds = %then.8.1, %then.8, %ifmerge.42, %ifmerge.38, %ifmerge.34, %then.34
+  %indvars.iv.lcssa = phi i64 [ 48, %then.8 ], [ 49, %then.8.1 ], [ %tmp4, %ifmerge.42 ], [ %tmp3, %ifmerge.38 ], [ %tmp2, %ifmerge.34 ], [ %tmp1, %then.34 ]
+  %tmp = trunc i64 %indvars.iv.lcssa to i32
+  br label %for.end
+
+for.end:                                          ; preds = %then.8.1, %ifmerge.8, %loop.exit
+  %i.0.lcssa = phi i32 [ %tmp, %loop.exit ], [ 50, %then.8.1 ], [ 50, %ifmerge.8 ]
+  ret i32 %i.0.lcssa
+
+; shl instruction will be dead eliminated when all it's uses will be rewritten.
+; CHECK-LABEL: loop.32:
+; CHECK-NOT: shl
+loop.32:                                          ; preds = %ifmerge.46, %entry
+  %i1.i64.0 = phi i64 [ 0, %entry ], [ %nextivloop.32, %ifmerge.46 ]
+  %tmp1 = shl i64 %i1.i64.0, 2
+  %tmp2 = or i64 %tmp1, 1
+  %arrayIdx = getelementptr inbounds i32, i32* %A, i64 %tmp2
+  %gepload = load i32, i32* %arrayIdx, align 4
+  %cmp.34 = icmp sgt i32 %gepload, %t
+  br i1 %cmp.34, label %then.34, label %ifmerge.34
+
+; CHECK-LABEL: then.34:
+then.34:                                          ; preds = %loop.32
+  %arrayIdx17 = getelementptr inbounds i32, i32* %A, i64 %tmp1
+  %gepload18 = load i32, i32* %arrayIdx17, align 4
+  %cmp.35 = icmp slt i32 %gepload18, %t
+  br i1 %cmp.35, label %loop.exit, label %ifmerge.34
+
+ifmerge.34:                                       ; preds = %then.34, %loop.32
+  %tmp3 = or i64 %tmp1, 2
+  %arrayIdx19 = getelementptr inbounds i32, i32* %A, i64 %tmp3
+  %gepload20 = load i32, i32* %arrayIdx19, align 4
+  %cmp.38 = icmp sgt i32 %gepload20, %t
+  %cmp.39 = icmp slt i32 %gepload, %t
+  %or.cond = and i1 %cmp.38, %cmp.39
+  br i1 %or.cond, label %loop.exit, label %ifmerge.38
+
+ifmerge.38:                                       ; preds = %ifmerge.34
+  %tmp4 = or i64 %tmp1, 3
+  %arrayIdx23 = getelementptr inbounds i32, i32* %A, i64 %tmp4
+  %gepload24 = load i32, i32* %arrayIdx23, align 4
+  %cmp.42 = icmp sgt i32 %gepload24, %t
+  %cmp.43 = icmp slt i32 %gepload20, %t
+  %or.cond55 = and i1 %cmp.42, %cmp.43
+  br i1 %or.cond55, label %loop.exit, label %ifmerge.42
+
+ifmerge.42:                                       ; preds = %ifmerge.38
+  %tmp5 = add i64 %tmp1, 4
+  %arrayIdx27 = getelementptr inbounds i32, i32* %A, i64 %tmp5
+  %gepload28 = load i32, i32* %arrayIdx27, align 4
+  %cmp.46 = icmp sgt i32 %gepload28, %t
+  %cmp.47 = icmp slt i32 %gepload24, %t
+  %or.cond56 = and i1 %cmp.46, %cmp.47
+  br i1 %or.cond56, label %loop.exit, label %ifmerge.46
+
+ifmerge.46:                                       ; preds = %ifmerge.42
+  %nextivloop.32 = add nuw nsw i64 %i1.i64.0, 1
+  %condloop.32 = icmp ult i64 %nextivloop.32, 12
+  br i1 %condloop.32, label %loop.32, label %loop.25
+
+loop.25:                                          ; preds = %ifmerge.46
+  %arrayIdx31 = getelementptr inbounds i32, i32* %A, i64 49
+  %gepload32 = load i32, i32* %arrayIdx31, align 4
+  %cmp.8 = icmp sgt i32 %gepload32, %t
+  br i1 %cmp.8, label %then.8, label %ifmerge.8
+
+then.8:                                           ; preds = %loop.25
+  %arrayIdx33 = getelementptr inbounds i32, i32* %A, i64 48
+  %gepload34 = load i32, i32* %arrayIdx33, align 4
+  %cmp.15 = icmp slt i32 %gepload34, %t
+  br i1 %cmp.15, label %loop.exit, label %ifmerge.8
+
+ifmerge.8:                                        ; preds = %then.8, %loop.25
+  %arrayIdx31.1 = getelementptr inbounds i32, i32* %A, i64 50
+  %gepload32.1 = load i32, i32* %arrayIdx31.1, align 4
+  %cmp.8.1 = icmp sgt i32 %gepload32.1, %t
+  br i1 %cmp.8.1, label %then.8.1, label %for.end
+
+then.8.1:                                         ; preds = %ifmerge.8
+  %arrayIdx33.1 = getelementptr inbounds i32, i32* %A, i64 49
+  %gepload34.1 = load i32, i32* %arrayIdx33.1, align 4
+  %cmp.15.1 = icmp slt i32 %gepload34.1, %t
+  br i1 %cmp.15.1, label %loop.exit, label %for.end
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/negative-scale.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/negative-scale.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/negative-scale.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/negative-scale.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+declare void @foo(i8)
+
+define void @not_addressing_mode(i8* %input, i64 %n) {
+; CHECK-LABEL: @not_addressing_mode(
+entry:
+  br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: %lsr.iv = phi i8* [ {{%[^,]+}}, %loop ], [ %input, %entry ]
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i64 %i, 1
+  %j = mul i64 %i, -2
+  ; (%input - 2 * %j) is not foldable. Worth another indvar.
+  %p = getelementptr i8, i8* %input, i64 %j
+  %v = load i8, i8* %p
+; CHECK: %v = load i8, i8* %lsr.iv
+  call void @foo(i8 %v)
+  %exitcond = icmp slt i64 %i.next, %n
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/nested-reduce.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/nested-reduce.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/nested-reduce.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/nested-reduce.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,52 @@
+; RUN: opt < %s -loop-reduce -S | not grep mul
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+; Make sure we don't get a multiply by 6 in this loop.
+
+define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) {
+entry:
+	%tmp.5 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
+	%tmp.25 = and i32 %A, 1		; <i32> [#uses=1]
+	br label %loopentry.1
+loopentry.1:		; preds = %loopexit.1, %entry
+	%indvar20 = phi i32 [ 0, %entry ], [ %indvar.next21, %loopexit.1 ]		; <i32> [#uses=2]
+	%k.1 = phi i32 [ 0, %entry ], [ %k.1.3, %loopexit.1 ]		; <i32> [#uses=2]
+	br i1 %tmp.5, label %no_exit.1.preheader, label %loopexit.1
+no_exit.1.preheader:		; preds = %loopentry.1
+	%i.0.0 = bitcast i32 %indvar20 to i32		; <i32> [#uses=1]
+	%tmp.9 = mul i32 %i.0.0, 6		; <i32> [#uses=1]
+	br label %no_exit.1.outer
+no_exit.1.outer:		; preds = %cond_true, %no_exit.1.preheader
+	%k.1.2.ph = phi i32 [ %k.1, %no_exit.1.preheader ], [ %k.09, %cond_true ]		; <i32> [#uses=2]
+	%j.1.2.ph = phi i32 [ 0, %no_exit.1.preheader ], [ %inc.1, %cond_true ]		; <i32> [#uses=1]
+	br label %no_exit.1
+no_exit.1:		; preds = %cond_continue, %no_exit.1.outer
+	%indvar.ui = phi i32 [ 0, %no_exit.1.outer ], [ %indvar.next, %cond_continue ]		; <i32> [#uses=2]
+	%indvar = bitcast i32 %indvar.ui to i32		; <i32> [#uses=1]
+	%j.1.2 = add i32 %indvar, %j.1.2.ph		; <i32> [#uses=2]
+	%tmp.11 = add i32 %j.1.2, %tmp.9		; <i32> [#uses=1]
+	%tmp.12 = trunc i32 %tmp.11 to i8		; <i8> [#uses=1]
+	%shift.upgrd.1 = zext i8 %tmp.12 to i32		; <i32> [#uses=1]
+	%tmp.13 = shl i32 %D, %shift.upgrd.1		; <i32> [#uses=2]
+	%tmp.15 = icmp eq i32 %tmp.13, %B		; <i1> [#uses=1]
+	%inc.1 = add i32 %j.1.2, 1		; <i32> [#uses=3]
+	br i1 %tmp.15, label %cond_true, label %cond_continue
+cond_true:		; preds = %no_exit.1
+	%tmp.26 = and i32 %tmp.25, %tmp.13		; <i32> [#uses=1]
+	%k.09 = add i32 %tmp.26, %k.1.2.ph		; <i32> [#uses=2]
+	%tmp.517 = icmp slt i32 %inc.1, %C		; <i1> [#uses=1]
+	br i1 %tmp.517, label %no_exit.1.outer, label %loopexit.1
+cond_continue:		; preds = %no_exit.1
+	%tmp.519 = icmp slt i32 %inc.1, %C		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar.ui, 1		; <i32> [#uses=1]
+	br i1 %tmp.519, label %no_exit.1, label %loopexit.1
+loopexit.1:		; preds = %cond_continue, %cond_true, %loopentry.1
+	%k.1.3 = phi i32 [ %k.1, %loopentry.1 ], [ %k.09, %cond_true ], [ %k.1.2.ph, %cond_continue ]		; <i32> [#uses=2]
+	%indvar.next21 = add i32 %indvar20, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next21, 4		; <i1> [#uses=1]
+	br i1 %exitcond, label %loopexit.0, label %loopentry.1
+loopexit.0:		; preds = %loopexit.1
+	ret i32 %k.1.3
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/nonintegral.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/nonintegral.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/nonintegral.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/nonintegral.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,45 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+; Address Space 10 is non-integral. The optimizer is not allowed to use
+; ptrtoint/inttoptr instructions. Make sure that this doesn't happen
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) {
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+; How exactly SCEV chooses to materialize isn't all that important, as
+; long as it doesn't try to round-trip through integers. As of this writing,
+; it emits a byte-wise gep, which is fine.
+; CHECK: getelementptr i64, i64 addrspace(10)* {{.*}}, i64 {{.*}}
+top:
+  br label %L86
+
+L86:                                              ; preds = %L86, %top
+  %i.0 = phi i64 [ 0, %top ], [ %tmp, %L86 ]
+  %tmp = add i64 %i.0, 1
+  br i1 undef, label %L86, label %if29
+
+if29:                                             ; preds = %L86
+  %tmp1 = shl i64 %tmp, 1
+  %tmp2 = add i64 %tmp1, -2
+  br label %if31
+
+if31:                                             ; preds = %if38, %if29
+  %"#temp#1.sroa.0.022" = phi i64 [ 0, %if29 ], [ %tmp3, %if38 ]
+  br label %L119
+
+L119:                                             ; preds = %L119, %if31
+  %i5.0 = phi i64 [ %"#temp#1.sroa.0.022", %if31 ], [ %tmp3, %L119 ]
+  %tmp3 = add i64 %i5.0, 1
+  br i1 undef, label %L119, label %if38
+
+if38:                                             ; preds = %L119
+  %tmp4 = add i64 %tmp2, %i5.0
+  %tmp5 = getelementptr i64, i64 addrspace(10)* %arg, i64 %tmp4
+  %tmp6 = load i64, i64 addrspace(10)* %tmp5
+  br i1 undef, label %done, label %if31
+
+done:                                             ; preds = %if38
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/nonlinear-postinc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/nonlinear-postinc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/nonlinear-postinc.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/nonlinear-postinc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt < %s -loop-reduce
+; PR6453
+
+target datalayout = "e-p:64:64:64"
+
+define void @_ZNK15PolynomialSpaceILi3EE13compute_indexEjRA3_j() nounwind {
+entry:
+  br label %bb6
+
+bb6:
+  %t4 = phi i32 [ 0, %entry ], [ %t3, %bb5 ]
+  %t16 = sub i32 undef, %t4
+  %t25 = sub i32 undef, %t4
+  %t26 = add i32 undef, %t25
+  br label %bb4
+
+bb4:
+  %t2 = phi i32 [ %t1, %bb3 ], [ 0, %bb6 ]
+  %t17 = mul i32 %t2, %t16
+  %t18 = zext i32 %t2 to i33
+  %t19 = add i32 %t2, -1
+  %t20 = zext i32 %t19 to i33
+  %t21 = mul i33 %t18, %t20
+  %t22 = lshr i33 %t21, 1
+  %t23 = trunc i33 %t22 to i32
+  %t24 = sub i32 %t17, %t23
+  %t27 = add i32 %t24, %t26
+  br i1 false, label %bb1, label %bb5
+
+bb1:
+  %t = icmp ugt i32 %t27, undef
+  br i1 %t, label %bb2, label %bb3
+
+bb3:
+  %t1 = add i32 %t2, 1
+  br label %bb4
+
+bb5:
+  %t3 = add i32 %t4, 1
+  br label %bb6
+
+bb2:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; Check that this test makes INDVAR and related stuff dead, because P[indvar]
+; gets reduced, making INDVAR dead.
+
+; RUN: opt < %s -loop-reduce -S | not grep INDVAR
+
+target datalayout = "e-p:32:32:32-n32"
+
+declare i1 @pred()
+
+declare i32 @getidx()
+
+define void @test([10000 x i32]* %P) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%idx = call i32 @getidx( )		; <i32> [#uses=1]
+	%STRRED = getelementptr [10000 x i32], [10000 x i32]* %P, i32 %INDVAR, i32 %idx		; <i32*> [#uses=1]
+	store i32 0, i32* %STRRED
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-reduce -disable-output
+; LSR should not crash on this.
+
+define fastcc void @loadloop() {
+entry:
+	switch i8 0, label %shortcirc_next [
+		 i8 32, label %loopexit.2
+		 i8 59, label %loopexit.2
+	]
+shortcirc_next:		; preds = %no_exit.2, %entry
+	%indvar37 = phi i32 [ 0, %entry ], [ %indvar.next38, %no_exit.2 ]		; <i32> [#uses=3]
+	%gep.upgrd.1 = zext i32 %indvar37 to i64		; <i64> [#uses=1]
+	%wp.2.4 = getelementptr i8, i8* null, i64 %gep.upgrd.1		; <i8*> [#uses=1]
+	br i1 false, label %loopexit.2, label %no_exit.2
+no_exit.2:		; preds = %shortcirc_next
+	%wp.2.4.rec = bitcast i32 %indvar37 to i32		; <i32> [#uses=1]
+	%inc.1.rec = add i32 %wp.2.4.rec, 1		; <i32> [#uses=1]
+	%inc.1 = getelementptr i8, i8* null, i32 %inc.1.rec		; <i8*> [#uses=2]
+	%indvar.next38 = add i32 %indvar37, 1		; <i32> [#uses=1]
+	switch i8 0, label %shortcirc_next [
+		 i8 32, label %loopexit.2
+		 i8 59, label %loopexit.2
+	]
+loopexit.2:		; preds = %no_exit.2, %no_exit.2, %shortcirc_next, %entry, %entry
+	%wp.2.7 = phi i8* [ null, %entry ], [ null, %entry ], [ %wp.2.4, %shortcirc_next ], [ %inc.1, %no_exit.2 ], [ %inc.1, %no_exit.2 ]		; <i8*> [#uses=0]
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,92 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+; PR9939
+
+; LSR should properly handle the post-inc offset when folding the
+; non-IV operand of an icmp into the IV.
+
+; CHECK:   [[r1:%[a-z0-9\.]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK:   [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1
+; CHECK:   [[r3:%[a-z0-9\.]+]] = bitcast i64 [[r2]] to i64
+; CHECK: for.body.lr.ph:
+; CHECK:   [[r4:%[a-z0-9]+]] = shl i64 [[r3]], 1
+; CHECK:   br label %for.body
+; CHECK: for.body:
+; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r4]], %for.body.lr.ph ]
+; CHECK:   %lsr.iv.next = add i64 %lsr.iv2, -2
+; CHECK:   %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16*
+; CHECK:   %cmp27 = icmp eq i16* %lsr.iv.next3, null
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.Vector2 = type { i16*, [64 x i16], i32 }
+
+ at .str = private unnamed_addr constant [37 x i8] c"0123456789abcdefghijklmnopqrstuvwxyz\00"
+
+define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 %radix, %struct.Vector2* nocapture %result) nounwind noinline {
+entry:
+  %buffer = alloca [33 x i16], align 16
+  %add.ptr = getelementptr inbounds [33 x i16], [33 x i16]* %buffer, i64 0, i64 33
+  %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
+  %sub.ptr.rhs.cast = ptrtoint i16* %add.ptr to i64
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %0 = phi i64 [ %indvar.next44, %do.body ], [ 0, %entry ]
+  %i.addr.0 = phi i32 [ %div, %do.body ], [ %i, %entry ]
+  %tmp51 = sub i64 32, %0
+  %incdec.ptr = getelementptr [33 x i16], [33 x i16]* %buffer, i64 0, i64 %tmp51
+  %rem = urem i32 %i.addr.0, 10
+  %div = udiv i32 %i.addr.0, 10
+  %idxprom = zext i32 %rem to i64
+  %arrayidx = getelementptr inbounds [37 x i8], [37 x i8]* @.str, i64 0, i64 %idxprom
+  %tmp5 = load i8, i8* %arrayidx, align 1
+  %conv = sext i8 %tmp5 to i16
+  store i16 %conv, i16* %incdec.ptr, align 2
+  %1 = icmp ugt i32 %i.addr.0, 9
+  %indvar.next44 = add i64 %0, 1
+  br i1 %1, label %do.body, label %do.end
+
+do.end:                                           ; preds = %do.body
+  %xap.0 = inttoptr i64 %0 to i1*
+  %cap.0 = ptrtoint i1* %xap.0 to i64
+  %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %sub.ptr.div39 = lshr exact i64 %sub.ptr.sub, 1
+  %conv11 = trunc i64 %sub.ptr.div39 to i32
+  %mLength = getelementptr inbounds %struct.Vector2, %struct.Vector2* %result, i64 0, i32 2
+  %idx.ext21 = bitcast i64 %sub.ptr.div39 to i64
+  %incdec.ptr.sum = add i64 %idx.ext21, -1
+  %cp.0.sum = sub i64 %incdec.ptr.sum, %0
+  %add.ptr22 = getelementptr [33 x i16], [33 x i16]* %buffer, i64 1, i64 %cp.0.sum
+  %cmp2740 = icmp eq i64 %idx.ext21, 0
+  br i1 %cmp2740, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %do.end
+  %tmp16 = load i32, i32* %mLength, align 4
+  %mBegin = getelementptr inbounds %struct.Vector2, %struct.Vector2* %result, i64 0, i32 0
+  %tmp14 = load i16*, i16** %mBegin, align 8
+  %tmp48 = zext i32 %tmp16 to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvar = phi i64 [ 0, %for.body.lr.ph ], [ %indvar.next, %for.body ]
+  %tmp46 = add i64 %tmp51, %indvar
+  %p.042 = getelementptr [33 x i16], [33 x i16]* %buffer, i64 0, i64 %tmp46
+  %tmp47 = sub i64 %indvar, %0
+  %incdec.ptr32 = getelementptr [33 x i16], [33 x i16]* %buffer, i64 1, i64 %tmp47
+  %tmp49 = add i64 %tmp48, %indvar
+  %dst.041 = getelementptr i16, i16* %tmp14, i64 %tmp49
+  %tmp29 = load i16, i16* %p.042, align 2
+  store i16 %tmp29, i16* %dst.041, align 2
+  %cmp27 = icmp eq i16* %incdec.ptr32, %add.ptr22
+  %indvar.next = add i64 %indvar, 1
+  br i1 %cmp27, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %do.end
+  %tmp38 = load i32, i32* %mLength, align 4
+  %add = add i32 %tmp38, %conv11
+  store i32 %add, i32* %mLength, align 4
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-optsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-optsize.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-optsize.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-optsize.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,43 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7m-arm-none-eabi"
+
+; Check that the IV updates (incdec.ptr{,1,2}) are kept in the latch block
+; and not moved to the header/exiting block. Inserting them in the header
+; doubles register pressure and adds moves.
+
+; CHECK-LABEL: @f
+; CHECK: while.cond:
+; CHECK: icmp sgt i32 %n.addr.0, 0
+; CHECK: while.body:
+; CHECK: incdec.ptr =
+; CHECK: incdec.ptr1 =
+; CHECK: incdec.ptr2 =
+; CHECK: dec = 
+define void @f(float* nocapture readonly %a, float* nocapture readonly %b, float* nocapture %c, i32 %n) {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %entry
+  %a.addr.0 = phi float* [ %a, %entry ], [ %incdec.ptr, %while.body ]
+  %b.addr.0 = phi float* [ %b, %entry ], [ %incdec.ptr1, %while.body ]
+  %c.addr.0 = phi float* [ %c, %entry ], [ %incdec.ptr2, %while.body ]
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %while.body ]
+  %cmp = icmp sgt i32 %n.addr.0, 0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.0, i32 1
+  %tmp = load float, float* %a.addr.0, align 4
+  %incdec.ptr1 = getelementptr inbounds float, float* %b.addr.0, i32 1
+  %tmp1 = load float, float* %b.addr.0, align 4
+  %add = fadd float %tmp, %tmp1
+  %incdec.ptr2 = getelementptr inbounds float, float* %c.addr.0, i32 1
+  store float %add, float* %c.addr.0, align 4
+  %dec = add nsw i32 %n.addr.0, -1
+  br label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr12018.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr12018.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr12018.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr12018.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt < %s -loop-reduce
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+%struct.nsTArray = type { i8 }
+%struct.nsTArrayHeader = type { i32 }
+
+define void @_Z6foobarR8nsTArray(%struct.nsTArray* %aValues, i32 %foo, %struct.nsTArrayHeader* %bar) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %_ZN8nsTArray9ElementAtEi.exit, %entry
+  %i.06 = phi i32 [ %add, %_ZN8nsTArray9ElementAtEi.exit ], [ 0, %entry ]
+  %call.i = call %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev() nounwind
+  %add.ptr.i = getelementptr inbounds %struct.nsTArrayHeader, %struct.nsTArrayHeader* %call.i, i32 1
+  %tmp = bitcast %struct.nsTArrayHeader* %add.ptr.i to %struct.nsTArray*
+  %arrayidx = getelementptr inbounds %struct.nsTArray, %struct.nsTArray* %tmp, i32 %i.06
+  %add = add nsw i32 %i.06, 1
+  call void @llvm.dbg.value(metadata %struct.nsTArray* %aValues, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !1)
+  br label %_ZN8nsTArray9ElementAtEi.exit
+
+_ZN8nsTArray9ElementAtEi.exit:                    ; preds = %for.body
+  %arrayidx.i = getelementptr inbounds %struct.nsTArray, %struct.nsTArray* %tmp, i32 %add
+  call void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray* %arrayidx, %struct.nsTArray* %arrayidx.i) nounwind
+  %cmp = icmp slt i32 %add, %foo
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %_ZN8nsTArray9ElementAtEi.exit
+  ret void
+}
+
+declare void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray*, %struct.nsTArray*)
+
+declare %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev()
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone
+
+!0 = !DILocalVariable(scope: !1)
+!1 = distinct !DISubprogram()

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr12048.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr12048.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr12048.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr12048.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-reduce
+
+define void @resolve_name() nounwind uwtable ssp {
+  br label %while.cond40.preheader
+while.cond132.while.cond.loopexit_crit_edge:
+  br label %while.cond40.preheader
+while.cond40.preheader:
+  br label %while.cond40
+while.cond40:
+  %indvars.iv194 = phi i8* [ null, %while.cond40.preheader ], [ %scevgep, %while.body51 ]
+  %tmp.1 = phi i8* [ undef, %while.cond40.preheader ], [ %incdec.ptr, %while.body51 ]
+  switch i8 undef, label %while.body51 [
+    i8 0, label %if.then59
+  ]
+while.body51:                                     ; preds = %land.end50
+  %incdec.ptr = getelementptr inbounds i8, i8* %tmp.1, i64 1
+  %scevgep = getelementptr i8, i8* %indvars.iv194, i64 1
+  br label %while.cond40
+if.then59:                                        ; preds = %while.end
+  br i1 undef, label %if.then64, label %if.end113
+if.then64:                                        ; preds = %if.then59
+  %incdec.ptr88.tmp.2 = select i1 undef, i8* undef, i8* undef
+  br label %if.end113
+if.end113:                                        ; preds = %if.then64, %if.then59
+  %tmp.4 = phi i8* [ %incdec.ptr88.tmp.2, %if.then64 ], [ undef, %if.then59 ]
+  %tmp.4195 = ptrtoint i8* %tmp.4 to i64
+  br  label %while.cond132.preheader
+while.cond132.preheader:                          ; preds = %if.end113
+  %cmp133173 = icmp eq i8* %tmp.1, %tmp.4
+  br i1 %cmp133173, label %while.cond40.preheader, label %while.body139.lr.ph
+while.body139.lr.ph:                              ; preds = %while.cond132.preheader
+  %scevgep198 = getelementptr i8, i8* %indvars.iv194, i64 0
+  %scevgep198199 = ptrtoint i8* %scevgep198 to i64
+  br label %while.body139
+while.body139:                                    ; preds = %while.body139, %while.body139.lr.ph
+  %start_of_var.0177 = phi i8* [ %tmp.1, %while.body139.lr.ph ], [ null, %while.body139 ]
+  br i1 undef, label %while.cond132.while.cond.loopexit_crit_edge, label %while.body139
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr12691.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr12691.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr12691.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr12691.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+ at d = common global i32 0, align 4
+
+define void @fn2(i32 %x) nounwind uwtable {
+entry:
+  br label %for.cond
+
+for.cond:
+  %g.0 = phi i32 [ 0, %entry ], [ %dec, %for.cond ]
+  %tobool = icmp eq i32 %x, 0
+  %dec = add nsw i32 %g.0, -1
+  br i1 %tobool, label %for.cond, label %for.end
+
+for.end:
+; CHECK:  %tmp1 = load i32, i32* @d, align 4
+; CHECK-NEXT:  %tmp2 = load i32, i32* @d, align 4
+; CHECK-NEXT:  %0 = sub i32 %tmp1, %tmp2
+
+  %tmp1 = load i32, i32* @d, align 4
+  %add = add nsw i32 %tmp1, %g.0
+  %tmp2 = load i32, i32* @d, align 4
+  %tobool26 = icmp eq i32 %x, 0
+  br i1 %tobool26, label %for.end5, label %for.body.lr.ph
+
+for.body.lr.ph:
+  %tobool3 = icmp ne i32 %tmp2, %add
+  br label %for.end5
+
+for.end5:
+  ret void
+}
+
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr18165.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr18165.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr18165.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr18165.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,88 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; LSR shouldn't reuse IV if the resultant offset is not valid for the operand type.
+; CHECK-NOT: trunc i32 %.ph to i8
+
+%struct.anon = type { i32, i32, i32 }
+
+ at c = global i32 1, align 4
+ at .str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+ at b = common global i32 0, align 4
+ at a = common global %struct.anon zeroinitializer, align 4
+ at e = common global %struct.anon zeroinitializer, align 4
+ at d = common global i32 0, align 4
+ at f = common global i32 0, align 4
+ at g = common global i32 0, align 4
+ at h = common global i32 0, align 4
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @main() #0 {
+entry:
+  %0 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @a, i64 0, i32 0), align 4, !tbaa !1
+  %tobool7.i = icmp eq i32 %0, 0
+  %.promoted.i = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6
+  %f.promoted.i = load i32, i32* @f, align 4, !tbaa !7
+  br label %for.body6.i.outer
+
+for.body6.i.outer:                                ; preds = %entry, %lor.end.i
+  %.ph = phi i32 [ %add.i, %lor.end.i ], [ 0, %entry ]
+  %or1512.i.ph = phi i32 [ %or15.i, %lor.end.i ], [ %f.promoted.i, %entry ]
+  %or1410.i.ph = phi i32 [ %or14.i, %lor.end.i ], [ %.promoted.i, %entry ]
+  %p.addr.16.i.ph = phi i8 [ %inc10.i, %lor.end.i ], [ -128, %entry ]
+  br i1 %tobool7.i, label %if.end9.i, label %lbl.loopexit.i
+
+lbl.loopexit.i:                                   ; preds = %for.body6.i.outer, %lbl.loopexit.i
+  br label %lbl.loopexit.i
+
+if.end9.i:                                        ; preds = %for.body6.i.outer
+  %inc10.i = add i8 %p.addr.16.i.ph, 1
+  %tobool12.i = icmp eq i8 %p.addr.16.i.ph, 0
+  br i1 %tobool12.i, label %lor.rhs.i, label %lor.end.i
+
+lor.rhs.i:                                        ; preds = %if.end9.i
+  %1 = load i32, i32* @b, align 4, !tbaa !7
+  %dec.i = add nsw i32 %1, -1
+  store i32 %dec.i, i32* @b, align 4, !tbaa !7
+  %tobool13.i = icmp ne i32 %1, 0
+  br label %lor.end.i
+
+lor.end.i:                                        ; preds = %lor.rhs.i, %if.end9.i
+  %2 = phi i1 [ true, %if.end9.i ], [ %tobool13.i, %lor.rhs.i ]
+  %lor.ext.i = zext i1 %2 to i32
+  %or14.i = or i32 %lor.ext.i, %or1410.i.ph
+  %or15.i = or i32 %or14.i, %or1512.i.ph
+  %add.i = add nsw i32 %.ph, 2
+  %cmp.i = icmp slt i32 %add.i, 21
+  br i1 %cmp.i, label %for.body6.i.outer, label %fn1.exit
+
+fn1.exit:                                         ; preds = %lor.end.i
+  store i32 0, i32* @g, align 4, !tbaa !7
+  store i32 %or14.i, i32* getelementptr inbounds (%struct.anon, %struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6
+  store i32 %or15.i, i32* @f, align 4, !tbaa !7
+  store i32 %add.i, i32* getelementptr inbounds (%struct.anon, %struct.anon* @e, i64 0, i32 1), align 4, !tbaa !8
+  store i32 0, i32* @h, align 4, !tbaa !7
+  %3 = load i32, i32* @b, align 4, !tbaa !7
+  %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
+  ret i32 0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.5 "}
+!1 = !{!2, !3, i64 0}
+!2 = !{!"", !3, i64 0, !3, i64 4, !3, i64 8}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!2, !3, i64 8}
+!7 = !{!3, !3, i64 0}
+!8 = !{!2, !3, i64 4}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr2537.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr2537.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr2537.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr2537.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt < %s -loop-reduce -disable-output
+; PR 2537
+
+define void @a() {
+entry:
+        br label %dobody
+
+dobody:         ; preds = %dobody, %entry
+        %y.0 = phi i128 [ 0, %entry ], [ %add, %dobody ]
+        %x.0 = phi i128 [ 0, %entry ], [ %add2, %dobody ]
+        %add = add i128 %y.0, shl (i128 1, i128 64)
+        %add2 = add i128 %x.0, shl (i128 1, i128 48)
+        call void @b( i128 %add )
+        %cmp = icmp ult i128 %add2, shl (i128 1, i128 64)
+        br i1 %cmp, label %dobody, label %afterdo
+
+afterdo:                ; preds = %dobody
+        ret void
+}
+
+declare void @b(i128 %add)

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr25541.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr25541.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr25541.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr25541.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,48 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define void @f() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.inc.i, %entry
+  %_First.addr.0.i = phi i32* [ null, %entry ], [ %incdec.ptr.i, %for.inc.i ]
+  invoke void @g()
+          to label %for.inc.i unwind label %catch.dispatch.i
+
+catch.dispatch.i:                                 ; preds = %for.cond.i
+  %cs = catchswitch within none [label %for.cond.1.preheader.i] unwind to caller
+
+for.cond.1.preheader.i:                           ; preds = %catch.dispatch.i
+  %0 = catchpad within %cs [i8* null, i32 64, i8* null]
+  %cmp.i = icmp eq i32* %_First.addr.0.i, null
+  br label %for.cond.1.i
+
+for.cond.1.i:                                     ; preds = %for.body.i, %for.cond.1.preheader.i
+  br i1 %cmp.i, label %for.end.i, label %for.body.i
+
+for.body.i:                                       ; preds = %for.cond.1.i
+  call void @g()
+  br label %for.cond.1.i
+
+for.inc.i:                                        ; preds = %for.cond.i
+  %incdec.ptr.i = getelementptr inbounds i32, i32* %_First.addr.0.i, i64 1
+  br label %for.cond.i
+
+for.end.i:                                        ; preds = %for.cond.1.i
+  catchret from %0 to label %leave
+
+leave:                                            ; preds = %for.end.i
+  ret void
+}
+
+; CHECK-LABEL: define void @f(
+; CHECK: %[[PHI:.*]]  = phi i64 [ %[[IV_NEXT:.*]], {{.*}} ], [ 0, {{.*}} ]
+; CHECK: %[[ITOP:.*]] = inttoptr i64 %[[PHI]] to i32*
+; CHECK: %[[CMP:.*]]  = icmp eq i32* %[[ITOP]], null
+; CHECK: %[[IV_NEXT]] = add i64 %[[PHI]], -4
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr2570.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr2570.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr2570.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr2570.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,287 @@
+; RUN: opt < %s -loop-reduce -S | grep "phi\>" | count 8
+; PR2570
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+ at g_14 = internal global i32 1		; <i32*> [#uses=1]
+ at g_39 = internal global i16 -5		; <i16*> [#uses=2]
+ at g_43 = internal global i32 -6		; <i32*> [#uses=3]
+ at g_33 = internal global i32 -1269044541		; <i32*> [#uses=1]
+ at g_137 = internal global i32 8		; <i32*> [#uses=1]
+ at g_82 = internal global i32 -5		; <i32*> [#uses=3]
+ at g_91 = internal global i32 1		; <i32*> [#uses=1]
+ at g_197 = internal global i32 1		; <i32*> [#uses=4]
+ at g_207 = internal global i32 1		; <i32*> [#uses=2]
+ at g_222 = internal global i16 4165		; <i16*> [#uses=1]
+ at g_247 = internal global i8 -21		; <i8*> [#uses=2]
+ at g_260 = internal global i32 1		; <i32*> [#uses=2]
+ at g_221 = internal global i16 -17503		; <i16*> [#uses=3]
+ at g_267 = internal global i16 1		; <i16*> [#uses=1]
+ at llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i32, i32, i16, i32, i8, i32)* @func_44 to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @func_44(i32 %p_45, i32 %p_46, i16 zeroext  %p_48, i32 %p_49, i8 zeroext  %p_50, i32 %p_52) nounwind  {
+entry:
+	tail call i32 @func_116( i8 zeroext  2 ) nounwind 		; <i32>:0 [#uses=0]
+	tail call i32 @func_63( i16 signext  2 ) nounwind 		; <i32>:1 [#uses=1]
+	load i16, i16* @g_39, align 2		; <i16>:2 [#uses=1]
+	tail call i32 @func_63( i16 signext  %2 ) nounwind 		; <i32>:3 [#uses=1]
+	trunc i32 %3 to i16		; <i16>:4 [#uses=1]
+	and i16 %4, 1		; <i16>:5 [#uses=1]
+	trunc i32 %p_52 to i8		; <i8>:6 [#uses=1]
+	trunc i32 %1 to i16		; <i16>:7 [#uses=1]
+	tail call i32 @func_74( i16 zeroext  %5, i8 zeroext  %6, i16 zeroext  %7, i16 zeroext  0 ) nounwind 		; <i32>:8 [#uses=0]
+	tail call i32 @func_124( i32 544824386 ) nounwind 		; <i32>:9 [#uses=0]
+	zext i8 %p_50 to i32		; <i32>:10 [#uses=1]
+	load i32, i32* @g_43, align 4		; <i32>:11 [#uses=1]
+	icmp sle i32 %10, %11		; <i1>:12 [#uses=1]
+	zext i1 %12 to i32		; <i32>:13 [#uses=2]
+	load i8, i8* @g_247, align 1		; <i8>:14 [#uses=1]
+	trunc i32 %p_45 to i16		; <i16>:15 [#uses=1]
+	zext i8 %14 to i16		; <i16>:16 [#uses=1]
+	tail call i32 @func_74( i16 zeroext  %15, i8 zeroext  0, i16 zeroext  %16, i16 zeroext  23618 ) nounwind 		; <i32>:17 [#uses=4]
+	icmp slt i32 %17, 0		; <i1>:18 [#uses=1]
+	br i1 %18, label %bb162, label %bb152
+
+bb152:		; preds = %entry
+	lshr i32 2147483647, %13		; <i32>:19 [#uses=1]
+	icmp slt i32 %19, %17		; <i1>:20 [#uses=1]
+	select i1 %20, i32 0, i32 %13		; <i32>:21 [#uses=1]
+	%.348 = shl i32 %17, %21		; <i32> [#uses=1]
+	br label %bb162
+
+bb162:		; preds = %bb152, %entry
+	%.0346 = phi i32 [ %.348, %bb152 ], [ %17, %entry ]		; <i32> [#uses=1]
+	tail call i32 @func_124( i32 1 ) nounwind 		; <i32>:22 [#uses=1]
+	mul i32 %22, %.0346		; <i32>:23 [#uses=1]
+	icmp slt i32 %p_45, 0		; <i1>:24 [#uses=1]
+	icmp ugt i32 %p_45, 31		; <i1>:25 [#uses=1]
+	%or.cond = or i1 %24, %25		; <i1> [#uses=1]
+	br i1 %or.cond, label %bb172, label %bb168
+
+bb168:		; preds = %bb162
+	lshr i32 2147483647, %p_45		; <i32>:26 [#uses=1]
+	shl i32 1392859848, %p_45		; <i32>:27 [#uses=1]
+	icmp slt i32 %26, 1392859848		; <i1>:28 [#uses=1]
+	%.op355 = add i32 %27, 38978		; <i32> [#uses=1]
+	%phitmp = select i1 %28, i32 1392898826, i32 %.op355		; <i32> [#uses=1]
+	br label %bb172
+
+bb172:		; preds = %bb168, %bb162
+	%.0343 = phi i32 [ %phitmp, %bb168 ], [ 1392898826, %bb162 ]		; <i32> [#uses=2]
+	tail call i32 @func_84( i32 1, i16 zeroext  0, i16 zeroext  8 ) nounwind 		; <i32>:29 [#uses=0]
+	icmp eq i32 %.0343, 0		; <i1>:30 [#uses=1]
+	%.0341 = select i1 %30, i32 1, i32 %.0343		; <i32> [#uses=1]
+	urem i32 %23, %.0341		; <i32>:31 [#uses=1]
+	load i32, i32* @g_137, align 4		; <i32>:32 [#uses=4]
+	icmp slt i32 %32, 0		; <i1>:33 [#uses=1]
+	br i1 %33, label %bb202, label %bb198
+
+bb198:		; preds = %bb172
+	%not. = icmp slt i32 %32, 1073741824		; <i1> [#uses=1]
+	zext i1 %not. to i32		; <i32>:34 [#uses=1]
+	%.351 = shl i32 %32, %34		; <i32> [#uses=1]
+	br label %bb202
+
+bb202:		; preds = %bb198, %bb172
+	%.0335 = phi i32 [ %.351, %bb198 ], [ %32, %bb172 ]		; <i32> [#uses=1]
+	icmp ne i32 %31, %.0335		; <i1>:35 [#uses=1]
+	zext i1 %35 to i32		; <i32>:36 [#uses=1]
+	tail call i32 @func_128( i32 %36 ) nounwind 		; <i32>:37 [#uses=0]
+	icmp eq i32 %p_45, 293685862		; <i1>:38 [#uses=1]
+	br i1 %38, label %bb205, label %bb311
+
+bb205:		; preds = %bb202
+	icmp sgt i32 %p_46, 214		; <i1>:39 [#uses=1]
+	zext i1 %39 to i32		; <i32>:40 [#uses=2]
+	tail call i32 @func_128( i32 %40 ) nounwind 		; <i32>:41 [#uses=0]
+	icmp sgt i32 %p_46, 65532		; <i1>:42 [#uses=1]
+	zext i1 %42 to i16		; <i16>:43 [#uses=1]
+	tail call i32 @func_74( i16 zeroext  23618, i8 zeroext  -29, i16 zeroext  %43, i16 zeroext  1 ) nounwind 		; <i32>:44 [#uses=2]
+	tail call i32 @func_103( i16 zeroext  -869 ) nounwind 		; <i32>:45 [#uses=0]
+	udiv i32 %44, 34162		; <i32>:46 [#uses=1]
+	icmp ult i32 %44, 34162		; <i1>:47 [#uses=1]
+	%.0331 = select i1 %47, i32 1, i32 %46		; <i32> [#uses=1]
+	urem i32 293685862, %.0331		; <i32>:48 [#uses=1]
+	tail call i32 @func_112( i32 %p_52, i16 zeroext  1 ) nounwind 		; <i32>:49 [#uses=0]
+	icmp eq i32 %p_52, 0		; <i1>:50 [#uses=2]
+	br i1 %50, label %bb222, label %bb215
+
+bb215:		; preds = %bb205
+	zext i16 %p_48 to i32		; <i32>:51 [#uses=1]
+	icmp eq i16 %p_48, 0		; <i1>:52 [#uses=1]
+	%.0329 = select i1 %52, i32 1, i32 %51		; <i32> [#uses=1]
+	udiv i32 -1, %.0329		; <i32>:53 [#uses=1]
+	icmp eq i32 %53, 0		; <i1>:54 [#uses=1]
+	br i1 %54, label %bb222, label %bb223
+
+bb222:		; preds = %bb215, %bb205
+	br label %bb223
+
+bb223:		; preds = %bb222, %bb215
+	%iftmp.437.0 = phi i32 [ 0, %bb222 ], [ 1, %bb215 ]		; <i32> [#uses=1]
+	load i32, i32* @g_91, align 4		; <i32>:55 [#uses=3]
+	tail call i32 @func_103( i16 zeroext  4 ) nounwind 		; <i32>:56 [#uses=0]
+	tail call i32 @func_112( i32 0, i16 zeroext  -31374 ) nounwind 		; <i32>:57 [#uses=0]
+	load i32, i32* @g_197, align 4		; <i32>:58 [#uses=1]
+	tail call i32 @func_124( i32 28156 ) nounwind 		; <i32>:59 [#uses=1]
+	load i32, i32* @g_260, align 4		; <i32>:60 [#uses=1]
+	load i32, i32* @g_43, align 4		; <i32>:61 [#uses=1]
+	xor i32 %61, %60		; <i32>:62 [#uses=1]
+	mul i32 %62, %59		; <i32>:63 [#uses=1]
+	trunc i32 %63 to i8		; <i8>:64 [#uses=1]
+	trunc i32 %58 to i16		; <i16>:65 [#uses=1]
+	tail call i32 @func_74( i16 zeroext  0, i8 zeroext  %64, i16 zeroext  %65, i16 zeroext  0 ) nounwind 		; <i32>:66 [#uses=2]
+	icmp slt i32 %66, 0		; <i1>:67 [#uses=1]
+	icmp slt i32 %55, 0		; <i1>:68 [#uses=1]
+	icmp ugt i32 %55, 31		; <i1>:69 [#uses=1]
+	or i1 %68, %69		; <i1>:70 [#uses=1]
+	%or.cond352 = or i1 %70, %67		; <i1> [#uses=1]
+	select i1 %or.cond352, i32 0, i32 %55		; <i32>:71 [#uses=1]
+	%.353 = ashr i32 %66, %71		; <i32> [#uses=2]
+	load i16, i16* @g_221, align 2		; <i16>:72 [#uses=1]
+	zext i16 %72 to i32		; <i32>:73 [#uses=1]
+	icmp ugt i32 %.353, 31		; <i1>:74 [#uses=1]
+	select i1 %74, i32 0, i32 %.353		; <i32>:75 [#uses=1]
+	%.0323 = lshr i32 %73, %75		; <i32> [#uses=1]
+	add i32 %.0323, %iftmp.437.0		; <i32>:76 [#uses=1]
+	and i32 %48, 255		; <i32>:77 [#uses=2]
+	add i32 %77, 2042556439		; <i32>:78 [#uses=1]
+	load i32, i32* @g_207, align 4		; <i32>:79 [#uses=2]
+	icmp ugt i32 %79, 31		; <i1>:80 [#uses=1]
+	select i1 %80, i32 0, i32 %79		; <i32>:81 [#uses=1]
+	%.0320 = lshr i32 %77, %81		; <i32> [#uses=1]
+	icmp ne i32 %78, %.0320		; <i1>:82 [#uses=1]
+	zext i1 %82 to i8		; <i8>:83 [#uses=1]
+	tail call i32 @func_25( i8 zeroext  %83 ) nounwind 		; <i32>:84 [#uses=1]
+	xor i32 %84, 1		; <i32>:85 [#uses=1]
+	load i32, i32* @g_197, align 4		; <i32>:86 [#uses=1]
+	add i32 %86, 1		; <i32>:87 [#uses=1]
+	add i32 %87, %85		; <i32>:88 [#uses=1]
+	icmp ugt i32 %76, %88		; <i1>:89 [#uses=1]
+	br i1 %89, label %bb241, label %bb311
+
+bb241:		; preds = %bb223
+	store i16 -9, i16* @g_221, align 2
+	udiv i32 %p_52, 1538244727		; <i32>:90 [#uses=1]
+	load i32, i32* @g_207, align 4		; <i32>:91 [#uses=1]
+	sub i32 %91, %90		; <i32>:92 [#uses=1]
+	load i32, i32* @g_14, align 4		; <i32>:93 [#uses=1]
+	trunc i32 %93 to i16		; <i16>:94 [#uses=1]
+	trunc i32 %p_46 to i16		; <i16>:95 [#uses=2]
+	sub i16 %94, %95		; <i16>:96 [#uses=1]
+	load i32, i32* @g_197, align 4		; <i32>:97 [#uses=1]
+	trunc i32 %97 to i16		; <i16>:98 [#uses=1]
+	tail call i32 @func_55( i32 -346178830, i16 zeroext  %98, i16 zeroext  %95 ) nounwind 		; <i32>:99 [#uses=0]
+	zext i16 %p_48 to i32		; <i32>:100 [#uses=1]
+	load i8, i8* @g_247, align 1		; <i8>:101 [#uses=1]
+	zext i8 %101 to i32		; <i32>:102 [#uses=1]
+	sub i32 %100, %102		; <i32>:103 [#uses=1]
+	tail call i32 @func_55( i32 %103, i16 zeroext  -2972, i16 zeroext  %96 ) nounwind 		; <i32>:104 [#uses=0]
+	xor i32 %92, 2968		; <i32>:105 [#uses=1]
+	load i32, i32* @g_197, align 4		; <i32>:106 [#uses=1]
+	icmp ugt i32 %105, %106		; <i1>:107 [#uses=1]
+	zext i1 %107 to i32		; <i32>:108 [#uses=1]
+	store i32 %108, i32* @g_33, align 4
+	br label %bb248
+
+bb248:		; preds = %bb284, %bb241
+	%p_49_addr.1.reg2mem.0 = phi i32 [ 0, %bb241 ], [ %134, %bb284 ]		; <i32> [#uses=2]
+	%p_48_addr.2.reg2mem.0 = phi i16 [ %p_48, %bb241 ], [ %p_48_addr.1, %bb284 ]		; <i16> [#uses=1]
+	%p_46_addr.1.reg2mem.0 = phi i32 [ %p_46, %bb241 ], [ %133, %bb284 ]		; <i32> [#uses=1]
+	%p_45_addr.1.reg2mem.0 = phi i32 [ %p_45, %bb241 ], [ %p_45_addr.0, %bb284 ]		; <i32> [#uses=2]
+	tail call i32 @func_63( i16 signext  1 ) nounwind 		; <i32>:109 [#uses=1]
+	icmp eq i32 %109, 0		; <i1>:110 [#uses=1]
+	br i1 %110, label %bb272.thread, label %bb255.thread
+
+bb272.thread:		; preds = %bb248
+	store i32 1, i32* @g_82
+	load i16, i16* @g_267, align 2		; <i16>:111 [#uses=1]
+	icmp eq i16 %111, 0		; <i1>:112 [#uses=1]
+	br i1 %112, label %bb311.loopexit.split, label %bb268
+
+bb255.thread:		; preds = %bb248
+	load i32, i32* @g_260, align 4		; <i32>:113 [#uses=1]
+	sub i32 %113, %p_52		; <i32>:114 [#uses=1]
+	and i32 %114, -20753		; <i32>:115 [#uses=1]
+	icmp ne i32 %115, 0		; <i1>:116 [#uses=1]
+	zext i1 %116 to i16		; <i16>:117 [#uses=1]
+	store i16 %117, i16* @g_221, align 2
+	br label %bb284
+
+bb268:		; preds = %bb268, %bb272.thread
+	%indvar = phi i32 [ 0, %bb272.thread ], [ %g_82.tmp.0, %bb268 ]		; <i32> [#uses=2]
+	%p_46_addr.0.reg2mem.0 = phi i32 [ %p_46_addr.1.reg2mem.0, %bb272.thread ], [ %119, %bb268 ]		; <i32> [#uses=1]
+	%g_82.tmp.0 = add i32 %indvar, 1		; <i32> [#uses=2]
+	trunc i32 %p_46_addr.0.reg2mem.0 to i16		; <i16>:118 [#uses=1]
+	and i32 %g_82.tmp.0, 28156		; <i32>:119 [#uses=1]
+	add i32 %indvar, 2		; <i32>:120 [#uses=4]
+	icmp sgt i32 %120, -1		; <i1>:121 [#uses=1]
+	br i1 %121, label %bb268, label %bb274.split
+
+bb274.split:		; preds = %bb268
+	store i32 %120, i32* @g_82
+	br i1 %50, label %bb279, label %bb276
+
+bb276:		; preds = %bb274.split
+	store i16 0, i16* @g_222, align 2
+	br label %bb284
+
+bb279:		; preds = %bb274.split
+	icmp eq i32 %120, 0		; <i1>:122 [#uses=1]
+	%.0317 = select i1 %122, i32 1, i32 %120		; <i32> [#uses=1]
+	udiv i32 -8, %.0317		; <i32>:123 [#uses=1]
+	trunc i32 %123 to i16		; <i16>:124 [#uses=1]
+	br label %bb284
+
+bb284:		; preds = %bb279, %bb276, %bb255.thread
+	%p_49_addr.0 = phi i32 [ %p_49_addr.1.reg2mem.0, %bb279 ], [ %p_49_addr.1.reg2mem.0, %bb276 ], [ 0, %bb255.thread ]		; <i32> [#uses=1]
+	%p_48_addr.1 = phi i16 [ %124, %bb279 ], [ %118, %bb276 ], [ %p_48_addr.2.reg2mem.0, %bb255.thread ]		; <i16> [#uses=1]
+	%p_45_addr.0 = phi i32 [ %p_45_addr.1.reg2mem.0, %bb279 ], [ %p_45_addr.1.reg2mem.0, %bb276 ], [ 8, %bb255.thread ]		; <i32> [#uses=3]
+	load i32, i32* @g_43, align 4		; <i32>:125 [#uses=1]
+	trunc i32 %125 to i8		; <i8>:126 [#uses=1]
+	tail call i32 @func_116( i8 zeroext  %126 ) nounwind 		; <i32>:127 [#uses=0]
+	lshr i32 65255, %p_45_addr.0		; <i32>:128 [#uses=1]
+	icmp ugt i32 %p_45_addr.0, 31		; <i1>:129 [#uses=1]
+	%.op = lshr i32 %128, 31		; <i32> [#uses=1]
+	%.op.op = xor i32 %.op, 1		; <i32> [#uses=1]
+	%.354..lobit.not = select i1 %129, i32 1, i32 %.op.op		; <i32> [#uses=1]
+	load i16, i16* @g_39, align 2		; <i16>:130 [#uses=1]
+	zext i16 %130 to i32		; <i32>:131 [#uses=1]
+	icmp slt i32 %.354..lobit.not, %131		; <i1>:132 [#uses=1]
+	zext i1 %132 to i32		; <i32>:133 [#uses=1]
+	add i32 %p_49_addr.0, 1		; <i32>:134 [#uses=2]
+	icmp sgt i32 %134, -1		; <i1>:135 [#uses=1]
+	br i1 %135, label %bb248, label %bb307
+
+bb307:		; preds = %bb284
+	tail call i32 @func_103( i16 zeroext  0 ) nounwind 		; <i32>:136 [#uses=0]
+	ret i32 %40
+
+bb311.loopexit.split:		; preds = %bb272.thread
+	store i32 1, i32* @g_82
+	ret i32 1
+
+bb311:		; preds = %bb223, %bb202
+	%.0 = phi i32 [ 1, %bb202 ], [ 0, %bb223 ]		; <i32> [#uses=1]
+	ret i32 %.0
+}
+
+declare i32 @func_25(i8 zeroext ) nounwind 
+
+declare i32 @func_55(i32, i16 zeroext , i16 zeroext ) nounwind 
+
+declare i32 @func_63(i16 signext ) nounwind 
+
+declare i32 @func_74(i16 zeroext , i8 zeroext , i16 zeroext , i16 zeroext ) nounwind 
+
+declare i32 @func_84(i32, i16 zeroext , i16 zeroext ) nounwind 
+
+declare i32 @func_103(i16 zeroext ) nounwind 
+
+declare i32 @func_124(i32) nounwind 
+
+declare i32 @func_128(i32) nounwind 
+
+declare i32 @func_116(i8 zeroext ) nounwind 
+
+declare i32 @func_112(i32, i16 zeroext ) nounwind 

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr27056.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr27056.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr27056.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr27056.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,51 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+%struct.L = type { i8, i8* }
+
+declare i32 @__CxxFrameHandler3(...)
+
+ at GV1 = external global %struct.L*
+ at GV2 = external global %struct.L
+
+define void @b_copy_ctor() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %0 = load %struct.L*, %struct.L** @GV1, align 8
+  br label %for.cond
+
+for.cond:                                         ; preds = %call.i.noexc, %entry
+  %d.0 = phi %struct.L* [ %0, %entry ], [ %incdec.ptr, %call.i.noexc ]
+  invoke void @a_copy_ctor()
+          to label %call.i.noexc unwind label %catch.dispatch
+
+call.i.noexc:                                     ; preds = %for.cond
+  %incdec.ptr = getelementptr inbounds %struct.L, %struct.L* %d.0, i64 1
+  br label %for.cond
+
+catch.dispatch:                                   ; preds = %for.cond
+  %1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %2 = catchpad within %1 [i8* null, i32 64, i8* null]
+  %cmp16 = icmp eq %struct.L* %0, %d.0
+  br i1 %cmp16, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %catch
+  %cmp = icmp eq %struct.L* @GV2, %d.0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %catch
+  catchret from %2 to label %try.cont
+
+try.cont:                                         ; preds = %for.end
+  ret void
+}
+
+; CHECK-LABEL: define void @b_copy_ctor(
+; CHECK:       catchpad
+; CHECK-NEXT:  icmp eq %struct.L
+; CHECK-NEXT:  %4 = sub i64 0, %1
+; CHECK-NEXT:  getelementptr {{.*}} getelementptr inbounds (%struct.L, %struct.L* @GV2, i32 0, i32 0), i64 %4
+
+declare void @a_copy_ctor()

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr3086.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr3086.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr3086.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr3086.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -loop-reduce
+; RUN: opt < %s -analyze -scalar-evolution
+; PR 3086
+
+	%struct.Cls = type { i32, i8, [2 x %struct.Cls*], [2 x %struct.Lit*] }
+	%struct.Lit = type { i8 }
+
+define fastcc i64 @collect_clauses() nounwind {
+entry:
+	br label %bb11
+
+bb5:		; preds = %bb9
+	%0 = load %struct.Lit*, %struct.Lit** %storemerge, align 8		; <%struct.Lit*> [#uses=0]
+	%indvar.next8 = add i64 %storemerge.rec, 1		; <i64> [#uses=1]
+	br label %bb9
+
+bb9:		; preds = %bb22, %bb5
+	%storemerge.rec = phi i64 [ %indvar.next8, %bb5 ], [ 0, %bb22 ]		; <i64> [#uses=2]
+	%storemerge = getelementptr %struct.Lit*, %struct.Lit** null, i64 %storemerge.rec		; <%struct.Lit**> [#uses=2]
+	%1 = icmp ugt %struct.Lit** null, %storemerge		; <i1> [#uses=1]
+	br i1 %1, label %bb5, label %bb22
+
+bb11:		; preds = %bb22, %entry
+	%2 = load %struct.Cls*, %struct.Cls** null, align 8		; <%struct.Cls*> [#uses=0]
+	br label %bb22
+
+bb22:		; preds = %bb11, %bb9
+	br i1 false, label %bb11, label %bb9
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr31627.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr31627.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr31627.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr31627.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,58 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.24215"
+
+define void @fn3() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %call = invoke i32 @fn2()
+          to label %for.cond.preheader unwind label %catch.dispatch2
+
+for.cond.preheader:                               ; preds = %entry
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.preheader, %for.cond
+  %b.0 = phi i32 [ %inc, %for.cond ], [ %call, %for.cond.preheader ]
+  %inc = add nsw i32 %b.0, 1
+  invoke void @fn1(i32 %inc)
+          to label %for.cond unwind label %catch.dispatch
+
+; CHECK:   %[[add:.*]] = add i32 %call, 1
+; CHECK:   br label %for.cond
+
+; CHECK: for.cond:                                         ; preds = %for.cond, %for.cond.preheader
+; CHECK:   %[[lsr_iv:.*]] = phi i32 [ %lsr.iv.next, %for.cond ], [ %[[add]], %for.cond.preheader ]
+; CHECK:   %[[lsr_iv_next:.*]] = add i32 %lsr.iv, 1
+; CHECK:   invoke void @fn1(i32 %[[lsr_iv]])
+
+
+catch.dispatch:                                   ; preds = %for.cond
+  %0 = catchswitch within none [label %catch] unwind label %catch.dispatch2
+
+catch:                                            ; preds = %catch.dispatch
+  %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+  invoke void @_CxxThrowException(i8* null, i8* null) #2 [ "funclet"(token %1) ]
+          to label %unreachable unwind label %catch.dispatch2
+
+catch.dispatch2:                                  ; preds = %catch.dispatch, %catch, %entry
+  %a.0 = phi i32 [ undef, %entry ], [ %call, %catch ], [ %call, %catch.dispatch ]
+  %2 = catchswitch within none [label %catch3] unwind to caller
+
+catch3:                                           ; preds = %catch.dispatch2
+  %3 = catchpad within %2 [i8* null, i32 64, i8* null]
+  call void @fn1(i32 %a.0) [ "funclet"(token %3) ]
+  catchret from %3 to label %try.cont4
+
+try.cont4:                                        ; preds = %catch3
+  ret void
+
+unreachable:                                      ; preds = %catch
+  unreachable
+}
+
+declare i32 @fn2()
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @fn1(i32)
+
+declare void @_CxxThrowException(i8*, i8*)

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr3399.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr3399.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr3399.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr3399.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; RUN: opt < %s -loop-reduce | llvm-dis
+; PR3399
+
+ at g_53 = external global i32		; <i32*> [#uses=1]
+
+define i32 @foo() nounwind {
+bb5.thread:
+	br label %bb
+
+bb:		; preds = %bb5, %bb5.thread
+	%indvar = phi i32 [ 0, %bb5.thread ], [ %indvar.next, %bb5 ]		; <i32> [#uses=2]
+	br i1 false, label %bb5, label %bb1
+
+bb1:		; preds = %bb
+	%l_2.0.reg2mem.0 = sub i32 0, %indvar		; <i32> [#uses=1]
+	%0 = load volatile i32, i32* @g_53, align 4		; <i32> [#uses=1]
+	%1 = trunc i32 %l_2.0.reg2mem.0 to i16		; <i16> [#uses=1]
+	%2 = trunc i32 %0 to i16		; <i16> [#uses=1]
+	%3 = mul i16 %2, %1		; <i16> [#uses=1]
+	%4 = icmp eq i16 %3, 0		; <i1> [#uses=1]
+	br i1 %4, label %bb7, label %bb2
+
+bb2:		; preds = %bb2, %bb1
+	br label %bb2
+
+bb5:		; preds = %bb
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb
+
+bb7:		; preds = %bb1
+	ret i32 1
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/pr3571.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/pr3571.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/pr3571.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/pr3571.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-reduce | llvm-dis
+; PR3571
+
+target triple = "i386-pc-mingw32"
+define void @_ZNK18qdesigner_internal10TreeWidget12drawBranchesEP8QPainterRK5QRectRK11QModelIndex() nounwind {
+entry:
+	br label %_ZNK11QModelIndex7isValidEv.exit.i
+
+bb.i:		; preds = %_ZNK11QModelIndex7isValidEv.exit.i
+	%indvar.next = add i32 %result.0.i, 1		; <i32> [#uses=1]
+	br label %_ZNK11QModelIndex7isValidEv.exit.i
+
+_ZNK11QModelIndex7isValidEv.exit.i:		; preds = %bb.i, %entry
+	%result.0.i = phi i32 [ 0, %entry ], [ %indvar.next, %bb.i ]		; <i32> [#uses=2]
+	%0 = load i32*, i32** null, align 4		; <%struct.QAbstractItemDelegate*> [#uses=0]
+	br i1 false, label %_ZN18qdesigner_internalL5levelEP18QAbstractItemModelRK11QModelIndex.exit, label %bb.i
+
+_ZN18qdesigner_internalL5levelEP18QAbstractItemModelRK11QModelIndex.exit:		; preds = %_ZNK11QModelIndex7isValidEv.exit.i
+	%1 = call i32 @_ZNK9QTreeView11indentationEv(i32* null) nounwind		; <i32> [#uses=1]
+	%2 = mul i32 %1, %result.0.i		; <i32> [#uses=1]
+	%3 = add i32 %2, -2		; <i32> [#uses=1]
+	%4 = add i32 %3, 0		; <i32> [#uses=1]
+	store i32 %4, i32* null, align 8
+	unreachable
+}
+
+declare i32 @_ZNK9QTreeView11indentationEv(i32*)

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; CHECK-NOT: {{inttoptr|ptrtoint}}
+; CHECK: scevgep
+; CHECK-NOT: {{inttoptr|ptrtoint}}
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n32:64"
+
+; Indvars shouldn't need inttoptr/ptrtoint to expand an address here.
+
+define void @foo(i8* %p) nounwind {
+entry:
+  br i1 true, label %bb.nph, label %for.end
+
+for.cond:
+  %phitmp = icmp slt i64 %inc, 20
+  br i1 %phitmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+  br label %for.end
+
+bb.nph:
+  br label %for.body
+
+for.body:
+  %storemerge1 = phi i64 [ %inc, %for.cond ], [ 0, %bb.nph ]
+  %call = tail call i64 @bar() nounwind
+  %call2 = tail call i64 @car() nounwind
+  %conv = trunc i64 %call2 to i8
+  %conv3 = sext i8 %conv to i64
+  %add = add nsw i64 %call, %storemerge1
+  %add4 = add nsw i64 %add, %conv3
+  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %add4
+  store i8 0, i8* %arrayidx
+  %inc = add nsw i64 %storemerge1, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+declare i64 @bar()
+
+declare i64 @car()

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/related_indvars.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/related_indvars.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/related_indvars.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/related_indvars.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-reduce -S | grep phi | count 1
+
+; This should only result in one PHI node!
+
+; void foo(double *D, double *E, double F) {
+;   while (D != E)
+;     *D++ = F;
+; }
+
+define void @foo(double* %D, double* %E, double %F) nounwind {
+entry:
+	%tmp.24 = icmp eq double* %D, %E		; <i1> [#uses=1]
+	br i1 %tmp.24, label %return, label %no_exit
+no_exit:		; preds = %no_exit, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=2]
+	%D_addr.0.0.rec = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%D_addr.0.0 = getelementptr double, double* %D, i32 %D_addr.0.0.rec		; <double*> [#uses=1]
+	%inc.rec = add i32 %D_addr.0.0.rec, 1		; <i32> [#uses=1]
+	%inc = getelementptr double, double* %D, i32 %inc.rec		; <double*> [#uses=1]
+	store double %F, double* %D_addr.0.0
+	%tmp.2 = icmp eq double* %inc, %E		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp.2, label %return, label %no_exit
+return:		; preds = %no_exit, %entry
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/remove_indvar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/remove_indvar.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/remove_indvar.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/remove_indvar.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; Check that this test makes INDVAR and related stuff dead.
+; RUN: opt < %s -loop-reduce -S | not grep INDVAR
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+declare i1 @pred()
+
+define void @test(i32* %P) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+        %i = phi i32 [ 0, %0 ], [ %i.next, %Loop ]
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%STRRED = getelementptr i32, i32* %P, i32 %INDVAR		; <i32*> [#uses=1]
+	store i32 0, i32* %STRRED
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+        %i.next = add i32 %i, 1
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/scaling_factor_cost_crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,68 @@
+; RUN: opt -loop-reduce %s -S -o - | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+; <rdar://problem/14199725> Assertion failed: (CurScaleCost >= 0 && "Legal addressing mode has an illegal cost!")
+; CHECK-LABEL: @scalingFactorCrash(
+define void @scalingFactorCrash() {
+  br i1 undef, label %1, label %24
+
+; <label>:1                                       ; preds = %0
+  br i1 undef, label %2, label %24
+
+; <label>:2                                       ; preds = %1
+  br i1 undef, label %3, label %24
+
+; <label>:3                                       ; preds = %2
+  br i1 undef, label %4, label %24
+
+; <label>:4                                       ; preds = %3
+  br i1 undef, label %24, label %6
+
+; <label>:5                                       ; preds = %6
+  br i1 undef, label %24, label %7
+
+; <label>:6                                       ; preds = %6, %4
+  br i1 undef, label %6, label %5
+
+; <label>:7                                       ; preds = %9, %5
+  br label %8
+
+; <label>:8                                       ; preds = %8, %7
+  br i1 undef, label %9, label %8
+
+; <label>:9                                       ; preds = %8
+  br i1 undef, label %7, label %10
+
+; <label>:10                                      ; preds = %9
+  br i1 undef, label %24, label %11
+
+; <label>:11                                      ; preds = %10
+  br i1 undef, label %15, label %13
+
+; <label>:12                                      ; preds = %14
+  br label %15
+
+; <label>:13                                      ; preds = %11
+  br label %14
+
+; <label>:14                                      ; preds = %14, %13
+  br i1 undef, label %14, label %12
+
+; <label>:15                                      ; preds = %12, %11
+  br i1 undef, label %16, label %24
+
+; <label>:16                                      ; preds = %16, %15
+  %17 = phi i32 [ %21, %16 ], [ undef, %15 ]
+  %18 = sub i32 %17, 1623127498
+  %19 = getelementptr inbounds i32, i32* undef, i32 %18
+  store i32 undef, i32* %19, align 4
+  %20 = add i32 %17, 1623127499
+  %21 = add i32 %20, -1623127498
+  %22 = add i32 %21, -542963121
+  %23 = icmp ult i32 %22, undef
+  br i1 undef, label %16, label %24
+
+; <label>:24                                      ; preds = %16, %15, %10, %5, %4, %3, %2, %1, %0
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/scev-after-loopinstsimplify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/scev-after-loopinstsimplify.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/scev-after-loopinstsimplify.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/scev-after-loopinstsimplify.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,43 @@
+; RUN: opt %s -indvars -loop-instsimplify -loop-reduce
+; We are only checking that there is no crash!
+
+; https://bugs.llvm.org/show_bug.cgi?id=37936
+
+; The problem is as follows:
+; 1. indvars marks %dec as NUW.
+; 2. loop-instsimplify runs instsimplify, which constant-folds %dec to -1
+; 3. loop-reduce tries to do some further modification, but crashes
+;    with an type assertion in cast, because %dec is no longer an Instruction,
+;    even though the SCEV data indicated it was.
+
+; If the runline is split into two, i.e. -indvars -loop-instsimplify first, that
+; stored into a file, and then -loop-reduce is run on that, there is no crash.
+; So it looks like the problem is due to -loop-instsimplify not discarding SCEV.
+
+target datalayout = "n16"
+
+ at a = external global i16, align 1
+
+define void @f1() {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %land.end, %entry
+  %c.0 = phi i16 [ 0, %entry ], [ %dec, %land.end ]
+  br i1 undef, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  ret void
+
+for.body:                                         ; preds = %for.cond
+  %0 = load i16, i16* @a, align 1
+  %cmp = icmp sgt i16 %0, %c.0
+  br i1 %cmp, label %land.rhs, label %land.end
+
+land.rhs:                                         ; preds = %for.body
+  unreachable
+
+land.end:                                         ; preds = %for.body
+  %dec = add nsw i16 %c.0, -1
+  br label %for.cond
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-reduce -S
+
+; Test that SCEV insertpoint's don't get corrupted and cause an
+; invalid instruction to be inserted in a block other than its parent.
+; See http://reviews.llvm.org/D20703 for context.
+define void @test() {
+entry:
+  %bf.load = load i32, i32* null, align 4
+  %bf.clear = lshr i32 %bf.load, 1
+  %div = and i32 %bf.clear, 134217727
+  %sub = add nsw i32 %div, -1
+  %0 = zext i32 %sub to i64
+  br label %while.cond
+
+while.cond:                                       ; preds = %cond.end, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %cond.end ], [ 0, %entry ]
+  %cmp = icmp eq i64 %indvars.iv, %0
+  br i1 %cmp, label %cleanup16, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  %1 = trunc i64 %indvars.iv to i32
+  %mul = shl i32 %1, 1
+  %add = add nuw i32 %mul, 2
+  %cmp3 = icmp ult i32 %add, 0
+  br i1 %cmp3, label %if.end, label %if.then
+
+if.then:                                          ; preds = %while.body
+  unreachable
+
+if.end:                                           ; preds = %while.body
+  br i1 false, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %if.end
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %if.end
+  %add7 = add i32 %1, 1
+  %cmp12 = icmp ugt i32 %add7, %sub
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br i1 %cmp12, label %if.then13, label %while.cond
+
+if.then13:                                        ; preds = %cond.end
+  unreachable
+
+cleanup16:                                        ; preds = %while.cond
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/sext-ind-var.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/sext-ind-var.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/sext-ind-var.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/sext-ind-var.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,139 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+; LSR used not to be able to generate a float* induction variable in
+; these cases due to scalar evolution not propagating nsw from an
+; instruction to the SCEV, preventing distributing sext into the
+; corresponding addrec.
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i + offset];
+;
+define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testadd
+; CHECK: sext i32 %offset to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = add nuw nsw i32 %i, %offset
+  %index64 = sext i32 %index32 to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i - offset];
+;
+define float @testsub(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testsub
+; CHECK: sext i32 %offset to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = sub nuw nsw i32 %i, %offset
+  %index64 = sext i32 %index32 to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i * stride];
+;
+define float @testmul(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @testmul
+; CHECK: sext i32 %stride to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = mul nuw nsw i32 %i, %stride
+  %index64 = sext i32 %index32 to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[3 * (i << 7)];
+;
+; The multiplication by 3 is to make the address calculation expensive
+; enough to force the introduction of a pointer induction variable.
+define float @testshl(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @testshl
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = shl nuw nsw i32 %i, 7
+  %index32mul = mul nuw nsw i32 %index32, 3
+  %index64 = sext i32 %index32mul to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-reduce -S | grep mul | count 1
+; LSR should not make two copies of the Q*L expression in the preheader!
+
+define i8 @test(i8* %A, i8* %B, i32 %L, i32 %Q, i32 %N.s) {
+entry:
+	%tmp.6 = mul i32 %Q, %L		; <i32> [#uses=1]
+	%N = bitcast i32 %N.s to i32		; <i32> [#uses=1]
+	br label %no_exit
+no_exit:		; preds = %no_exit, %entry
+	%indvar.ui = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=2]
+	%Sum.0.0 = phi i8 [ 0, %entry ], [ %tmp.21, %no_exit ]		; <i8> [#uses=1]
+	%indvar = bitcast i32 %indvar.ui to i32		; <i32> [#uses=1]
+	%N_addr.0.0 = sub i32 %N.s, %indvar		; <i32> [#uses=1]
+	%tmp.8 = add i32 %N_addr.0.0, %tmp.6		; <i32> [#uses=2]
+	%tmp.9 = getelementptr i8, i8* %A, i32 %tmp.8		; <i8*> [#uses=1]
+	%tmp.10 = load i8, i8* %tmp.9		; <i8> [#uses=1]
+	%tmp.17 = getelementptr i8, i8* %B, i32 %tmp.8		; <i8*> [#uses=1]
+	%tmp.18 = load i8, i8* %tmp.17		; <i8> [#uses=1]
+	%tmp.19 = sub i8 %tmp.10, %tmp.18		; <i8> [#uses=1]
+	%tmp.21 = add i8 %tmp.19, %Sum.0.0		; <i8> [#uses=2]
+	%indvar.next = add i32 %indvar.ui, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %loopexit, label %no_exit
+loopexit:		; preds = %no_exit
+	ret i8 %tmp.21
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/share_ivs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/share_ivs.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/share_ivs.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/share_ivs.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt < %s -loop-reduce -S | grep phi | count 1
+
+; This testcase should have ONE stride 18 indvar, the other use should have a
+; loop invariant value (B) added to it inside of the loop, instead of having
+; a whole indvar based on B for it.
+
+declare i1 @cond(i32)
+
+define void @test(i32 %B) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%IV = phi i32 [ 0, %0 ], [ %IVn, %Loop ]		; <i32> [#uses=3]
+	%C = mul i32 %IV, 18		; <i32> [#uses=1]
+	%D = mul i32 %IV, 18		; <i32> [#uses=1]
+	%E = add i32 %D, %B		; <i32> [#uses=1]
+	%cnd = call i1 @cond( i32 %E )		; <i1> [#uses=1]
+	call i1 @cond( i32 %C )		; <i1>:1 [#uses=0]
+	%IVn = add i32 %IV, 1		; <i32> [#uses=1]
+	br i1 %cnd, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/shl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/shl.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/shl.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/shl.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-reduce -gvn -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+
+define void @_Z3fooPfll(float* nocapture readonly %input, i64 %n, i64 %s) {
+; CHECK-LABEL: @_Z3fooPfll(
+entry:
+  %mul = shl nsw i64 %s, 2
+; CHECK: %mul = shl i64 %s, 2
+  tail call void @_Z3bazl(i64 %mul) #2
+; CHECK-NEXT: call void @_Z3bazl(i64 %mul)
+  %cmp.5 = icmp sgt i64 %n, 0
+  br i1 %cmp.5, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.06 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float, float* %input, i64 %i.06
+; LoopStrengthReduce should reuse %mul as the stride.
+; CHECK: getelementptr i1, i1* {{[^,]+}}, i64 %mul
+  %0 = load float, float* %arrayidx, align 4
+  tail call void @_Z3barf(float %0) #2
+  %add = add nsw i64 %i.06, %s
+  %cmp = icmp slt i64 %add, %n
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+declare void @_Z3bazl(i64)
+
+declare void @_Z3barf(float)

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/two-combinations-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/two-combinations-bug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/two-combinations-bug.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/two-combinations-bug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,55 @@
+; RUN: opt < %s -loop-reduce -lsr-recursive-setupcost=0 -S | FileCheck %s
+
+; This test is adapted from the n-body test of the LLVM test-suite: A bug in
+; r345114 caused LSR to generate incorrect code. The test verifies that the
+; induction variable generated for the inner loop depends on the induction
+; variable of the outer loop.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.planet.0.3.6.11.12.15.16.17.24.25.26.33.44 = type { double, double, double, double, double, double, double }
+
+; Function Attrs: nounwind uwtable
+define dso_local void @advance(i32 %nbodies, %struct.planet.0.3.6.11.12.15.16.17.24.25.26.33.44* nocapture %bodies) local_unnamed_addr #0 {
+; CHECK-LABEL: @advance(
+; CHECK:  for.cond.loopexit:
+; CHECK:    [[LSR_IV_NEXT:%.*]] = add i64 [[LSR_IV:%.*]], -1
+; CHECK:    br label %for.body
+; CHECK:  for.body:
+; CHECK:    [[LSR_IV]] = phi i64 [ [[LSR_IV_NEXT]]
+; CHECK:    br label %for.body3
+; CHECK:  for.body3:
+; CHECK:    [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %for.body3 ], [ [[LSR_IV]], %for.body ]
+; CHECK:    [[LSR_IV_NEXT2]] = add i64 [[LSR_IV1]], -1
+; CHECK:    [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT2]], 0
+; CHECK:    br i1 [[EXITCOND]], label %for.cond.loopexit, label %for.body3
+;
+entry:
+  %wide.trip.count = zext i32 %nbodies to i64
+  br label %for.body
+
+for.cond.loopexit:                                ; preds = %for.body3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond.loopexit, %entry
+  %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.cond.loopexit ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body
+  %indvars.iv98 = phi i64 [ %indvars.iv, %for.body ], [ %indvars.iv.next99, %for.body3 ]
+  %z9 = getelementptr inbounds %struct.planet.0.3.6.11.12.15.16.17.24.25.26.33.44, %struct.planet.0.3.6.11.12.15.16.17.24.25.26.33.44* %bodies, i64 %indvars.iv98, i32 2
+  %tmp = load double, double* %z9, align 8, !tbaa !0
+  %indvars.iv.next99 = add nuw nsw i64 %indvars.iv98, 1
+  %exitcond = icmp eq i64 %indvars.iv.next99, %wide.trip.count
+  br i1 %exitcond, label %for.cond.loopexit, label %for.body3
+}
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = !{!1, !2, i64 16}
+!1 = !{!"planet", !2, i64 0, !2, i64 8, !2, i64 16, !2, i64 24, !2, i64 32, !2, i64 40, !2, i64 48}
+!2 = !{!"double", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Copy of uglygep with a different address space
+; This tests expandAddToGEP uses the right smaller integer type for
+; another address space
+define void @Z4() nounwind {
+; CHECK: define void @Z4
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i16 %t4, 1                         ; <i16> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ]      ; <i16> [#uses=3]
+  br label %bb1
+
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8, i8 addrspace(1)* undef, i16 %t4
+; CHECK-NEXT: br label %bb14
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i16 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i16 %t4, 16                     ; <i16> [#uses=1]
+  br label %bb14
+
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float addrspace(1)*, float addrspace(1)* addrspace(1)* undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float, float addrspace(1)* %t6, i16 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8, i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
+; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8, i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t2
+  %t6 = load float addrspace(1)*, float addrspace(1)* addrspace(1)* undef
+  %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8, i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8 addrspace(1)* %t9
+  br label %bb14
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/uglygep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/uglygep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/uglygep.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/uglygep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,122 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @Z4() nounwind {
+; CHECK-LABEL: define void @Z4(
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i64 %t4, 1                         ; <i64> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i64 [ %t, %bb2 ], [ 0, %bb ]      ; <i64> [#uses=3]
+  br label %bb1
+
+; CHECK: bb10:
+; CHECK-NEXT: %t7 = icmp eq i64 %t4, 0
+; Host %t2 computation outside the loop.
+; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8, i8* undef, i64 %t4
+; CHECK-NEXT: br label %bb14
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i64 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i64 %t4, 16                     ; <i64> [#uses=1]
+  br label %bb14
+
+; CHECK: bb14:
+; CHECK-NEXT: store i8 undef, i8* [[SCEVGEP]]
+; CHECK-NEXT: %t6 = load float*, float** undef
+; Fold %t3's add within the address.
+; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float, float* %t6, i64 4
+; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float* [[SCEVGEP1]] to i8*
+; Use the induction variable (%t4) to access the right element
+; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8, i8* [[SCEVGEP2]], i64 %t4
+; CHECK-NEXT: store i8 undef, i8* [[ADDRESS]]
+; CHECK-NEXT: br label %bb14
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8, i8* undef, i64 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8* %t2
+  %t6 = load float*, float** undef
+  %t8 = bitcast float* %t6 to i8*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8, i8* %t8, i64 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8* %t9
+  br label %bb14
+}
+
+define fastcc void @TransformLine() nounwind {
+; CHECK-LABEL: @TransformLine(
+bb:
+  br label %loop0
+
+; CHECK: loop0:
+; Induction variable is initialized to -2.
+; CHECK-NEXT: [[PHIIV:%[^ ]+]] = phi i32 [ [[IVNEXT:%[^ ]+]], %loop0 ], [ -2, %bb ]
+; CHECK-NEXT: [[IVNEXT]] = add nuw nsw i32 [[PHIIV]], 1
+; CHECK-NEXT: br i1 false, label %loop0, label %bb0
+loop0:                                            ; preds = %loop0, %bb
+  %i0 = phi i32 [ %i0.next, %loop0 ], [ 0, %bb ]  ; <i32> [#uses=2]
+  %i0.next = add i32 %i0, 1                       ; <i32> [#uses=1]
+  br i1 false, label %loop0, label %bb0
+
+bb0:                                              ; preds = %loop0
+  br label %loop1
+
+; CHECK: loop1:
+; CHECK-NEXT: %i1 = phi i32 [ 0, %bb0 ], [ %i1.next, %bb5 ]
+; IVNEXT covers the uses of %i0 and %t0.
+; Therefore, %t0 has been removed.
+; The critical edge has been split.
+; CHECK-NEXT: br i1 false, label %bb2, label %[[LOOP1BB6:.+]]
+loop1:                                            ; preds = %bb5, %bb0
+  %i1 = phi i32 [ 0, %bb0 ], [ %i1.next, %bb5 ]   ; <i32> [#uses=4]
+  %t0 = add i32 %i0, %i1                          ; <i32> [#uses=1]
+  br i1 false, label %bb2, label %bb6
+
+; CHECK: bb2:
+; Critical edge split.
+; CHECK-NEXT: br i1 true, label %[[BB2BB6:[^,]+]], label %bb5
+bb2:                                              ; preds = %loop1
+  br i1 true, label %bb6, label %bb5
+
+; CHECK: bb5:
+; CHECK-NEXT: %i1.next = add i32 %i1, 1
+; CHECK-NEXT: br i1 true, label %[[BB5BB6:[^,]+]], label %loop1
+bb5:                                              ; preds = %bb2
+  %i1.next = add i32 %i1, 1                       ; <i32> [#uses=1]
+  br i1 true, label %bb6, label %loop1
+
+; bb5 to bb6 split basic block.
+; CHECK: [[BB5BB6]]:
+; CHECK-NEXT: [[INITIALVAL:%[^ ]+]] = add i32 [[IVNEXT]], %i1.next
+; CHECK-NEXT: br label %[[SPLITTOBB6:.+]]
+
+; bb2 to bb6 split basic block.
+; CHECK: [[BB2BB6]]:
+; CHECK-NEXT: br label %[[SPLITTOBB6]]
+
+; Split basic blocks to bb6.
+; CHECK: [[SPLITTOBB6]]:
+; CHECK-NEXT: [[INITP8:%[^ ]+]] = phi i32 [ [[INITIALVAL]], %[[BB5BB6]] ], [ undef, %[[BB2BB6]] ]
+; CHECK-NEXT: [[INITP9:%[^ ]+]] = phi i32 [ undef, %[[BB5BB6]] ], [ %i1, %[[BB2BB6]] ]
+; CHECK-NEXT: br label %bb6
+  
+; CHECK: [[LOOP1BB6]]:
+; CHECK-NEXT: br label %bb6
+
+; CHECK: bb6:
+; CHECK-NEXT: %p8 = phi i32 [ undef, %[[LOOP1BB6]] ], [ [[INITP8]], %[[SPLITTOBB6]] ]
+; CHECK-NEXT: %p9 = phi i32 [ %i1, %[[LOOP1BB6]] ], [ [[INITP9]], %[[SPLITTOBB6]] ]
+; CHECK-NEXT: unreachable
+bb6:                                              ; preds = %bb5, %bb2, %loop1
+  %p8 = phi i32 [ %t0, %bb5 ], [ undef, %loop1 ], [ undef, %bb2 ] ; <i32> [#uses=0]
+  %p9 = phi i32 [ undef, %bb5 ], [ %i1, %loop1 ], [ %i1, %bb2 ] ; <i32> [#uses=0]
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -loop-reduce -S | \
+; RUN:   grep "add i32 %indvar630.ui, 1"
+;
+; Make sure that the use of the IV outside of the loop (the store) uses the 
+; post incremented value of the IV, not the preincremented value.  This 
+; prevents the loop from having to keep the post and pre-incremented value
+; around for the duration of the loop, adding a copy and an extra register
+; to the loop.
+
+declare i1 @pred(i32)
+
+define void @test([700 x i32]* %nbeaux_.0__558, i32* %i_.16574) {
+then.0:
+	br label %no_exit.2
+no_exit.2:		; preds = %no_exit.2, %then.0
+	%indvar630.ui = phi i32 [ 0, %then.0 ], [ %indvar.next631, %no_exit.2 ]		; <i32> [#uses=3]
+	%indvar630 = bitcast i32 %indvar630.ui to i32		; <i32> [#uses=2]
+	%gep.upgrd.1 = zext i32 %indvar630.ui to i64		; <i64> [#uses=1]
+	%tmp.38 = getelementptr [700 x i32], [700 x i32]* %nbeaux_.0__558, i32 0, i64 %gep.upgrd.1		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp.38
+	%inc.2 = add i32 %indvar630, 2		; <i32> [#uses=1]
+	%tmp.34 = call i1 @pred( i32 %indvar630 )		; <i1> [#uses=1]
+	%indvar.next631 = add i32 %indvar630.ui, 1		; <i32> [#uses=1]
+	br i1 %tmp.34, label %no_exit.2, label %loopexit.2.loopexit
+loopexit.2.loopexit:		; preds = %no_exit.2
+	store i32 %inc.2, i32* %i_.16574
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,41 @@
+; Base should not be i*3, it should be i*2.
+; RUN: opt < %s -loop-reduce -S | \
+; RUN:   not grep "mul.*%i, 3"
+
+; Indvar should not start at zero:
+; RUN: opt < %s -loop-reduce -S | \
+; RUN:   not grep "phi i32 .* 0"
+; END.
+
+; mul uint %i, 3
+
+target datalayout = "e-p:32:32-n32"
+target triple = "i686-apple-darwin8"
+ at flags2 = external global [8193 x i8], align 32		; <[8193 x i8]*> [#uses=1]
+
+define void @foo(i32 %k, i32 %i.s) {
+entry:
+	%i = bitcast i32 %i.s to i32		; <i32> [#uses=2]
+	%k_addr.012 = shl i32 %i.s, 1		; <i32> [#uses=1]
+	%tmp14 = icmp sgt i32 %k_addr.012, 8192		; <i1> [#uses=1]
+	br i1 %tmp14, label %return, label %bb.preheader
+bb.preheader:		; preds = %entry
+	%tmp. = shl i32 %i, 1		; <i32> [#uses=1]
+	br label %bb
+bb:		; preds = %bb, %bb.preheader
+	%indvar = phi i32 [ %indvar.next, %bb ], [ 0, %bb.preheader ]		; <i32> [#uses=2]
+	%tmp.15 = mul i32 %indvar, %i		; <i32> [#uses=1]
+	%tmp.16 = add i32 %tmp.15, %tmp.		; <i32> [#uses=2]
+	%k_addr.0.0 = bitcast i32 %tmp.16 to i32		; <i32> [#uses=1]
+	%gep.upgrd.1 = zext i32 %tmp.16 to i64		; <i64> [#uses=1]
+	%tmp = getelementptr [8193 x i8], [8193 x i8]* @flags2, i32 0, i64 %gep.upgrd.1		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp
+	%k_addr.0 = add i32 %k_addr.0.0, %i.s		; <i32> [#uses=1]
+	%tmp.upgrd.2 = icmp sgt i32 %k_addr.0, 8192		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp.upgrd.2, label %return.loopexit, label %bb
+return.loopexit:		; preds = %bb
+	br label %return
+return:		; preds = %return.loopexit, %entry
+	ret void
+}

Added: llvm/trunk/test/Transforms/LoopStrengthReduce/variable_stride.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/variable_stride.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/variable_stride.ll (added)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/variable_stride.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; Check that variable strides are reduced to adds instead of multiplies.
+; RUN: opt < %s -loop-reduce -S | not grep mul
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+declare i1 @pred(i32)
+
+define void @test([10000 x i32]* %P, i32 %STRIDE) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%Idx = mul i32 %INDVAR, %STRIDE		; <i32> [#uses=1]
+	%cond = call i1 @pred( i32 %Idx )		; <i1> [#uses=1]
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopTransformWarning/distribution-remarks-missed.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopTransformWarning/distribution-remarks-missed.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopTransformWarning/distribution-remarks-missed.ll (added)
+++ llvm/trunk/test/Transforms/LoopTransformWarning/distribution-remarks-missed.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,99 @@
+; Legacy pass manager
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+; New pass manager
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+
+; CHECK: warning: source.cpp:19:5: loop not distributed: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; YAML:     --- !Failure
+; YAML-NEXT: Pass:            transform-warning
+; YAML-NEXT: Name:            FailedRequestedDistribution
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 19, Column: 5 }
+; YAML-NEXT: Function:        _Z17test_array_boundsPiS_i
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not distributed: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering'
+; YAML-NEXT: ...
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) !dbg !8 {
+entry:
+  %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
+  br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32
+
+for.body.preheader:                          
+  br label %for.body, !dbg !35
+
+for.body:                                    
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %idxprom1 = sext i32 %0 to i64, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !50
+
+for.end.loopexit:                            
+  br label %for.end
+
+for.end:                                      
+  ret void, !dbg !36
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "source.cpp", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "source.cpp", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 10, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 16, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.5.0"}
+!12 = !DILocation(line: 3, column: 8, scope: !13)
+!13 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4)
+!16 = !DILocation(line: 4, column: 5, scope: !17)
+!17 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !13)
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !DILocation(line: 5, column: 9, scope: !23)
+!23 = distinct !DILexicalBlock(line: 5, column: 9, file: !1, scope: !17)
+!24 = !DILocation(line: 8, column: 1, scope: !4)
+!25 = !DILocation(line: 12, column: 8, scope: !26)
+!26 = distinct !DILexicalBlock(line: 12, column: 3, file: !1, scope: !7)
+!30 = !DILocation(line: 13, column: 5, scope: !26)
+!31 = !DILocation(line: 14, column: 1, scope: !7)
+!32 = !DILocation(line: 18, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(line: 18, column: 3, file: !1, scope: !8)
+!35 = !DILocation(line: 19, column: 5, scope: !33)
+!36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, retainedNodes: !2)
+
+!50 = !{!50, !{!"llvm.loop.distribute.enable"}}

Added: llvm/trunk/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll (added)
+++ llvm/trunk/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,33 @@
+; RUN: opt -transform-warning -disable-output < %s 2>&1 | FileCheck -allow-empty %s
+;
+; llvm.org/PR40546
+; Do not warn about about leftover llvm.loop.vectorize.enable for already
+; vectorized loops.
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @test(i32 %n) {
+entry:
+  %cmp = icmp eq i32 %n, 0
+  br i1 %cmp, label %simd.if.end, label %omp.inner.for.body.preheader
+
+omp.inner.for.body.preheader:
+  %wide.trip.count = zext i32 %n to i64
+  br label %omp.inner.for.body
+
+omp.inner.for.body:
+  %indvars.iv = phi i64 [ 0, %omp.inner.for.body.preheader ], [ %indvars.iv.next, %omp.inner.for.body ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %simd.if.end, label %omp.inner.for.body, !llvm.loop !0
+
+simd.if.end:
+  ret void
+}
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
+!2 = !{!"llvm.loop.isvectorized"}
+
+
+; CHECK-NOT: loop not vectorized

Added: llvm/trunk/test/Transforms/LoopTransformWarning/optnone.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopTransformWarning/optnone.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopTransformWarning/optnone.ll (added)
+++ llvm/trunk/test/Transforms/LoopTransformWarning/optnone.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,50 @@
+; Legacy pass manager
+; RUN: opt -transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning < %s 2>&1 | FileCheck -allow-empty %s
+;
+; New pass manager
+; RUN: opt -passes=transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning < %s 2>&1 | FileCheck -allow-empty %s
+;
+; Verify that no transformation warnings are emitted for functions with
+; 'optnone' attribute.
+;
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @func(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 {
+entry:
+  %cmp9 = icmp sgt i32 %Length, 0
+  br i1 %cmp9, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %idxprom1 = sext i32 %0 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
+  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  store i32 %1, i32* %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !0
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+attributes #0 = { noinline optnone }
+
+!0 = distinct !{!0, !1, !2, !3}
+!1 = !{!"llvm.loop.unroll.enable"}
+!2 = !{!"llvm.loop.distribute.enable"}
+!3 = !{!"llvm.loop.unroll_and_jam.enable"}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+
+; CHECK-NOT: warning

Added: llvm/trunk/test/Transforms/LoopTransformWarning/unrollandjam-remarks-missed.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopTransformWarning/unrollandjam-remarks-missed.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopTransformWarning/unrollandjam-remarks-missed.ll (added)
+++ llvm/trunk/test/Transforms/LoopTransformWarning/unrollandjam-remarks-missed.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,99 @@
+; Legacy pass manager
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+; New pass manager
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+
+; CHECK: warning: source.cpp:19:5: loop not unroll-and-jammed: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; YAML:     --- !Failure
+; YAML-NEXT: Pass:            transform-warning
+; YAML-NEXT: Name:            FailedRequestedUnrollAndJamming
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 19, Column: 5 }
+; YAML-NEXT: Function:        _Z17test_array_boundsPiS_i
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not unroll-and-jammed: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering'
+; YAML-NEXT: ...
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) !dbg !8 {
+entry:
+  %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
+  br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32
+
+for.body.preheader:                          
+  br label %for.body, !dbg !35
+
+for.body:                                    
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %idxprom1 = sext i32 %0 to i64, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !50
+
+for.end.loopexit:                            
+  br label %for.end
+
+for.end:                                      
+  ret void, !dbg !36
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "source.cpp", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "source.cpp", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 10, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 16, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.5.0"}
+!12 = !DILocation(line: 3, column: 8, scope: !13)
+!13 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4)
+!16 = !DILocation(line: 4, column: 5, scope: !17)
+!17 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !13)
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !DILocation(line: 5, column: 9, scope: !23)
+!23 = distinct !DILexicalBlock(line: 5, column: 9, file: !1, scope: !17)
+!24 = !DILocation(line: 8, column: 1, scope: !4)
+!25 = !DILocation(line: 12, column: 8, scope: !26)
+!26 = distinct !DILexicalBlock(line: 12, column: 3, file: !1, scope: !7)
+!30 = !DILocation(line: 13, column: 5, scope: !26)
+!31 = !DILocation(line: 14, column: 1, scope: !7)
+!32 = !DILocation(line: 18, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(line: 18, column: 3, file: !1, scope: !8)
+!35 = !DILocation(line: 19, column: 5, scope: !33)
+!36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, retainedNodes: !2)
+
+!50 = !{!50, !{!"llvm.loop.unroll_and_jam.enable"}}

Added: llvm/trunk/test/Transforms/LoopTransformWarning/unrolling-remarks-missed.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopTransformWarning/unrolling-remarks-missed.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopTransformWarning/unrolling-remarks-missed.ll (added)
+++ llvm/trunk/test/Transforms/LoopTransformWarning/unrolling-remarks-missed.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,99 @@
+; Legacy pass manager
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+; New pass manager
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+
+; CHECK: warning: source.cpp:19:5: loop not unrolled: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; YAML:     --- !Failure
+; YAML-NEXT: Pass:            transform-warning
+; YAML-NEXT: Name:            FailedRequestedUnrolling
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 19, Column: 5 }
+; YAML-NEXT: Function:        _Z17test_array_boundsPiS_i
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not unrolled: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering'
+; YAML-NEXT: ...
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) !dbg !8 {
+entry:
+  %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
+  br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32
+
+for.body.preheader:                          
+  br label %for.body, !dbg !35
+
+for.body:                                    
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %idxprom1 = sext i32 %0 to i64, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !50
+
+for.end.loopexit:                            
+  br label %for.end
+
+for.end:                                      
+  ret void, !dbg !36
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "source.cpp", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "source.cpp", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 10, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 16, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.5.0"}
+!12 = !DILocation(line: 3, column: 8, scope: !13)
+!13 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4)
+!16 = !DILocation(line: 4, column: 5, scope: !17)
+!17 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !13)
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !DILocation(line: 5, column: 9, scope: !23)
+!23 = distinct !DILexicalBlock(line: 5, column: 9, file: !1, scope: !17)
+!24 = !DILocation(line: 8, column: 1, scope: !4)
+!25 = !DILocation(line: 12, column: 8, scope: !26)
+!26 = distinct !DILexicalBlock(line: 12, column: 3, file: !1, scope: !7)
+!30 = !DILocation(line: 13, column: 5, scope: !26)
+!31 = !DILocation(line: 14, column: 1, scope: !7)
+!32 = !DILocation(line: 18, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(line: 18, column: 3, file: !1, scope: !8)
+!35 = !DILocation(line: 19, column: 5, scope: !33)
+!36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, retainedNodes: !2)
+
+!50 = !{!50, !{!"llvm.loop.unroll.enable"}}

Added: llvm/trunk/test/Transforms/LoopTransformWarning/vectorization-remarks-missed.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopTransformWarning/vectorization-remarks-missed.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopTransformWarning/vectorization-remarks-missed.ll (added)
+++ llvm/trunk/test/Transforms/LoopTransformWarning/vectorization-remarks-missed.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,113 @@
+; Legacy pass manager
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+; New pass manager
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+
+; C/C++ code for tests
+; void test(int *A, int Length) {
+; #pragma clang loop vectorize(enable) interleave(enable)
+;   for (int i = 0; i < Length; i++) {
+;     A[i] = i;
+;     if (A[i] > Length)
+;       break;
+;   }
+; }
+; File, line, and column should match those specified in the metadata
+; CHECK: warning: source.cpp:19:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; YAML:     --- !Failure
+; YAML-NEXT: Pass:            transform-warning
+; YAML-NEXT: Name:            FailedRequestedVectorization
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 19, Column: 5 }
+; YAML-NEXT: Function:        _Z17test_array_boundsPiS_i
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering'
+; YAML-NEXT: ...
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) !dbg !8 {
+entry:
+  %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
+  br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34
+
+for.body.preheader:                          
+  br label %for.body, !dbg !35
+
+for.body:                                    
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %idxprom1 = sext i32 %0 to i64, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !34
+
+for.end.loopexit:                            
+  br label %for.end
+
+for.end:                                      
+  ret void, !dbg !36
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "source.cpp", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "source.cpp", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 10, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 16, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.5.0"}
+!12 = !DILocation(line: 3, column: 8, scope: !13)
+!13 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4)
+!14 = !{!14, !15, !15}
+!15 = !{!"llvm.loop.vectorize.enable", i1 true}
+!16 = !DILocation(line: 4, column: 5, scope: !17)
+!17 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !13)
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !DILocation(line: 5, column: 9, scope: !23)
+!23 = distinct !DILexicalBlock(line: 5, column: 9, file: !1, scope: !17)
+!24 = !DILocation(line: 8, column: 1, scope: !4)
+!25 = !DILocation(line: 12, column: 8, scope: !26)
+!26 = distinct !DILexicalBlock(line: 12, column: 3, file: !1, scope: !7)
+!27 = !{!27, !28, !29}
+!28 = !{!"llvm.loop.interleave.count", i32 1}
+!29 = !{!"llvm.loop.vectorize.width", i32 1}
+!30 = !DILocation(line: 13, column: 5, scope: !26)
+!31 = !DILocation(line: 14, column: 1, scope: !7)
+!32 = !DILocation(line: 18, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(line: 18, column: 3, file: !1, scope: !8)
+!34 = !{!34, !15}
+!35 = !DILocation(line: 19, column: 5, scope: !33)
+!36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, retainedNodes: !2)

Added: llvm/trunk/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,14 @@
+; RUN: opt < %s -loop-unroll -disable-output
+
+define i32 @main() {
+entry:
+	br label %no_exit
+no_exit:		; preds = %no_exit, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp ne i32 %indvar.next, -2147483648		; <i1> [#uses=1]
+	br i1 %exitcond, label %no_exit, label %loopexit
+loopexit:		; preds = %no_exit
+	ret i32 0
+}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt < %s -loop-unroll -loop-simplify -disable-output
+
+define void @print_board() {
+entry:
+	br label %no_exit.1
+no_exit.1:		; preds = %cond_false.2, %entry
+	br label %no_exit.2
+no_exit.2:		; preds = %no_exit.2, %no_exit.1
+	%indvar1 = phi i32 [ 0, %no_exit.1 ], [ %indvar.next2, %no_exit.2 ]		; <i32> [#uses=1]
+	%indvar.next2 = add i32 %indvar1, 1		; <i32> [#uses=2]
+	%exitcond3 = icmp ne i32 %indvar.next2, 7		; <i1> [#uses=1]
+	br i1 %exitcond3, label %no_exit.2, label %loopexit.2
+loopexit.2:		; preds = %no_exit.2
+	br i1 false, label %cond_true.2, label %cond_false.2
+cond_true.2:		; preds = %loopexit.2
+	ret void
+cond_false.2:		; preds = %loopexit.2
+	br i1 false, label %no_exit.1, label %loopexit.1
+loopexit.1:		; preds = %cond_false.2
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; RUN: opt < %s -loop-unroll -S | grep bb72.2
+
+define void @vorbis_encode_noisebias_setup() {
+entry:
+	br label %cond_true.outer
+cond_true.outer:		; preds = %bb72, %entry
+	%indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]		; <i32> [#uses=1]
+	br label %bb72
+bb72:		; preds = %cond_true.outer
+	%indvar.next2 = add i32 %indvar1.ph, 1		; <i32> [#uses=2]
+	%exitcond3 = icmp eq i32 %indvar.next2, 3		; <i1> [#uses=1]
+	br i1 %exitcond3, label %cond_true138, label %cond_true.outer
+cond_true138:		; preds = %bb72
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,17 @@
+; PR 1334
+; RUN: opt < %s -loop-unroll -disable-output
+
+define void @sal__math_float_manipulator_7__math__joint_array_dcv_ops__Omultiply__3([6 x float]* %agg.result) {
+entry:
+	%tmp282911 = zext i8 0 to i32		; <i32> [#uses=1]
+	br label %cond_next
+cond_next:		; preds = %cond_next, %entry
+	%indvar = phi i8 [ 0, %entry ], [ %indvar.next, %cond_next ]		; <i8> [#uses=1]
+	%indvar.next = add i8 %indvar, 1		; <i8> [#uses=2]
+	%exitcond = icmp eq i8 %indvar.next, 7		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb27, label %cond_next
+bb27:		; preds = %cond_next
+	%tmp282911.lcssa = phi i32 [ %tmp282911, %cond_next ]		; <i32> [#uses=0]
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -loop-unroll -S | not grep undef
+; PR1385
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+        %struct.__mpz_struct = type { i32, i32, i32* }
+
+
+define void @Foo(%struct.__mpz_struct* %base) {
+entry:
+        %want = alloca [1 x %struct.__mpz_struct], align 16             ; <[1 x %struct.__mpz_struct]*> [#uses=4]
+        %want1 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0          ; <%struct.__mpz_struct*> [#uses=1]
+        call void @__gmpz_init( %struct.__mpz_struct* %want1 )
+        %want27 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0         ; <%struct.__mpz_struct*> [#uses=1]
+        %want3 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0          ; <%struct.__mpz_struct*> [#uses=1]
+        %want2 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0          ; <%struct.__mpz_struct*> [#uses=2]
+        br label %bb
+
+bb:             ; preds = %bb, %entry
+        %i.01.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]          ; <i32> [#uses=1]
+        %want23.0 = phi %struct.__mpz_struct* [ %want27, %entry ], [ %want2, %bb ]              ; <%struct.__mpz_struct*> [#uses=1]
+        call void @__gmpz_mul( %struct.__mpz_struct* %want23.0, %struct.__mpz_struct* %want3, %struct.__mpz_struct* %base )
+        %indvar.next = add i32 %i.01.0, 1               ; <i32> [#uses=2]
+        %exitcond = icmp ne i32 %indvar.next, 2         ; <i1> [#uses=1]
+        br i1 %exitcond, label %bb, label %bb10
+
+bb10:           ; preds = %bb
+        %want2.lcssa = phi %struct.__mpz_struct* [ %want2, %bb ]                ; <%struct.__mpz_struct*> [#uses=1]
+        call void @__gmpz_clear( %struct.__mpz_struct* %want2.lcssa )
+        ret void
+}
+
+declare void @__gmpz_init(%struct.__mpz_struct*)
+declare void @__gmpz_mul(%struct.__mpz_struct*, %struct.__mpz_struct*, %struct.__mpz_struct*)
+declare void @__gmpz_clear(%struct.__mpz_struct*)
+

Added: llvm/trunk/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt < %s -loop-unroll -unroll-count=3 -S | grep bb72.2
+
+define void @foo(i32 %trips) {
+entry:
+	br label %cond_true.outer
+
+cond_true.outer:
+	%indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]
+	br label %bb72
+
+bb72:
+	%indvar.next2 = add i32 %indvar1.ph, 1
+	%exitcond3 = icmp eq i32 %indvar.next2, %trips
+	br i1 %exitcond3, label %cond_true138, label %cond_true.outer
+
+cond_true138:
+	ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/2007-11-05-Crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2007-11-05-Crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2007-11-05-Crash.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2007-11-05-Crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,295 @@
+; RUN: opt < %s -disable-output -loop-unroll
+; PR1770
+; PR1947
+
+	%struct.cl_engine = type { i32, i16, i32, i8**, i8**, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
+	%struct.cl_limits = type { i32, i32, i32, i32, i16, i64 }
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i32, i8*, i8*, i8, %struct.cli_bm_patt* }
+	%struct.cli_ctx = type { i8**, i64*, %struct.cli_matcher*, %struct.cl_engine*, %struct.cl_limits*, i32, i32, i32, i32, %struct.cli_dconf* }
+	%struct.cli_dconf = type { i32, i32, i32, i32, i32, i32, i32 }
+	%struct.cli_matcher = type { i16, i8, i32*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+declare i8* @calloc(i64, i64)
+
+define fastcc i32 @cli_scanpe(i32 %desc, %struct.cli_ctx* %ctx) {
+entry:
+	br i1 false, label %cond_next17, label %cond_true14
+
+cond_true14:		; preds = %entry
+	ret i32 0
+
+cond_next17:		; preds = %entry
+	br i1 false, label %LeafBlock, label %LeafBlock1250
+
+LeafBlock1250:		; preds = %cond_next17
+	ret i32 0
+
+LeafBlock:		; preds = %cond_next17
+	br i1 false, label %cond_next33, label %cond_true30
+
+cond_true30:		; preds = %LeafBlock
+	ret i32 0
+
+cond_next33:		; preds = %LeafBlock
+	br i1 false, label %cond_next90, label %cond_true42
+
+cond_true42:		; preds = %cond_next33
+	ret i32 0
+
+cond_next90:		; preds = %cond_next33
+	br i1 false, label %cond_next100, label %cond_true97
+
+cond_true97:		; preds = %cond_next90
+	ret i32 0
+
+cond_next100:		; preds = %cond_next90
+	br i1 false, label %cond_next109, label %cond_true106
+
+cond_true106:		; preds = %cond_next100
+	ret i32 0
+
+cond_next109:		; preds = %cond_next100
+	br i1 false, label %cond_false, label %cond_true118
+
+cond_true118:		; preds = %cond_next109
+	ret i32 0
+
+cond_false:		; preds = %cond_next109
+	br i1 false, label %NodeBlock1482, label %cond_true126
+
+cond_true126:		; preds = %cond_false
+	ret i32 0
+
+NodeBlock1482:		; preds = %cond_false
+	br i1 false, label %cond_next285, label %NodeBlock1480
+
+NodeBlock1480:		; preds = %NodeBlock1482
+	ret i32 0
+
+cond_next285:		; preds = %NodeBlock1482
+	br i1 false, label %cond_next320, label %cond_true294
+
+cond_true294:		; preds = %cond_next285
+	ret i32 0
+
+cond_next320:		; preds = %cond_next285
+	br i1 false, label %LeafBlock1491, label %LeafBlock1493
+
+LeafBlock1493:		; preds = %cond_next320
+	ret i32 0
+
+LeafBlock1491:		; preds = %cond_next320
+	br i1 false, label %cond_true400, label %cond_true378
+
+cond_true378:		; preds = %LeafBlock1491
+	ret i32 1
+
+cond_true400:		; preds = %LeafBlock1491
+	br i1 false, label %cond_next413, label %cond_true406
+
+cond_true406:		; preds = %cond_true400
+	ret i32 0
+
+cond_next413:		; preds = %cond_true400
+	br i1 false, label %cond_next429, label %cond_true424
+
+cond_true424:		; preds = %cond_next413
+	ret i32 0
+
+cond_next429:		; preds = %cond_next413
+	br i1 false, label %NodeBlock1557, label %NodeBlock1579
+
+NodeBlock1579:		; preds = %cond_next429
+	ret i32 0
+
+NodeBlock1557:		; preds = %cond_next429
+	br i1 false, label %LeafBlock1543, label %NodeBlock1555
+
+NodeBlock1555:		; preds = %NodeBlock1557
+	ret i32 0
+
+LeafBlock1543:		; preds = %NodeBlock1557
+	br i1 false, label %cond_next870, label %cond_next663
+
+cond_next663:		; preds = %LeafBlock1543
+	ret i32 0
+
+cond_next870:		; preds = %LeafBlock1543
+	br i1 false, label %cond_true1012, label %cond_true916
+
+cond_true916:		; preds = %cond_next870
+	ret i32 0
+
+cond_true1012:		; preds = %cond_next870
+	br i1 false, label %cond_next3849, label %cond_true2105
+
+cond_true2105:		; preds = %cond_true1012
+	ret i32 0
+
+cond_next3849:		; preds = %cond_true1012
+	br i1 false, label %cond_next4378, label %bb6559
+
+bb3862:		; preds = %cond_next4385
+	br i1 false, label %cond_false3904, label %cond_true3876
+
+cond_true3876:		; preds = %bb3862
+	ret i32 0
+
+cond_false3904:		; preds = %bb3862
+	br i1 false, label %cond_next4003, label %cond_true3935
+
+cond_true3935:		; preds = %cond_false3904
+	ret i32 0
+
+cond_next4003:		; preds = %cond_false3904
+	br i1 false, label %cond_next5160, label %cond_next4015
+
+cond_next4015:		; preds = %cond_next4003
+	ret i32 0
+
+cond_next4378:		; preds = %cond_next3849
+	br i1 false, label %cond_next4385, label %bb4393
+
+cond_next4385:		; preds = %cond_next4378
+	br i1 false, label %bb3862, label %bb4393
+
+bb4393:		; preds = %cond_next4385, %cond_next4378
+	ret i32 0
+
+cond_next5160:		; preds = %cond_next4003
+	br i1 false, label %bb5188, label %bb6559
+
+bb5188:		; preds = %cond_next5160
+	br i1 false, label %cond_next5285, label %cond_true5210
+
+cond_true5210:		; preds = %bb5188
+	ret i32 0
+
+cond_next5285:		; preds = %bb5188
+	br i1 false, label %cond_true5302, label %cond_true5330
+
+cond_true5302:		; preds = %cond_next5285
+	br i1 false, label %bb7405, label %bb7367
+
+cond_true5330:		; preds = %cond_next5285
+	ret i32 0
+
+bb6559:		; preds = %cond_next5160, %cond_next3849
+	ret i32 0
+
+bb7367:		; preds = %cond_true5302
+	ret i32 0
+
+bb7405:		; preds = %cond_true5302
+	br i1 false, label %cond_next8154, label %cond_true7410
+
+cond_true7410:		; preds = %bb7405
+	ret i32 0
+
+cond_next8154:		; preds = %bb7405
+	br i1 false, label %cond_true8235, label %bb9065
+
+cond_true8235:		; preds = %cond_next8154
+	br i1 false, label %bb8274, label %bb8245
+
+bb8245:		; preds = %cond_true8235
+	ret i32 0
+
+bb8274:		; preds = %cond_true8235
+	br i1 false, label %cond_next8358, label %cond_true8295
+
+cond_true8295:		; preds = %bb8274
+	ret i32 0
+
+cond_next8358:		; preds = %bb8274
+	br i1 false, label %cond_next.i509, label %cond_true8371
+
+cond_true8371:		; preds = %cond_next8358
+	ret i32 -123
+
+cond_next.i509:		; preds = %cond_next8358
+	br i1 false, label %bb36.i, label %bb33.i
+
+bb33.i:		; preds = %cond_next.i509
+	ret i32 0
+
+bb36.i:		; preds = %cond_next.i509
+	br i1 false, label %cond_next54.i, label %cond_true51.i
+
+cond_true51.i:		; preds = %bb36.i
+	ret i32 0
+
+cond_next54.i:		; preds = %bb36.i
+	%tmp10.i.i527 = call i8* @calloc( i64 0, i64 1 )		; <i8*> [#uses=1]
+	br i1 false, label %cond_next11.i.i, label %bb132.i
+
+bb132.i:		; preds = %cond_next54.i
+	ret i32 0
+
+cond_next11.i.i:		; preds = %cond_next54.i
+	br i1 false, label %bb32.i.i545, label %cond_true1008.critedge.i
+
+bb32.i.i545:		; preds = %cond_next11.i.i
+	br i1 false, label %cond_next349.i, label %cond_true184.i
+
+cond_true184.i:		; preds = %bb32.i.i545
+	ret i32 0
+
+cond_next349.i:		; preds = %bb32.i.i545
+	br i1 false, label %cond_next535.i, label %cond_true1008.critedge1171.i
+
+cond_next535.i:		; preds = %cond_next349.i
+	br i1 false, label %cond_next569.i, label %cond_false574.i
+
+cond_next569.i:		; preds = %cond_next535.i
+	br i1 false, label %cond_next670.i, label %cond_true1008.critedge1185.i
+
+cond_false574.i:		; preds = %cond_next535.i
+	ret i32 0
+
+cond_next670.i:		; preds = %cond_next569.i
+	br i1 false, label %cond_true692.i, label %cond_next862.i
+
+cond_true692.i:		; preds = %cond_next670.i
+	br i1 false, label %cond_false742.i, label %cond_true718.i
+
+cond_true718.i:		; preds = %cond_true692.i
+	ret i32 0
+
+cond_false742.i:		; preds = %cond_true692.i
+	br i1 false, label %cond_true784.i, label %cond_next9079
+
+cond_true784.i:		; preds = %cond_next811.i, %cond_false742.i
+	%indvar1411.i.reg2mem.0 = phi i8 [ %indvar.next1412.i, %cond_next811.i ], [ 0, %cond_false742.i ]		; <i8> [#uses=1]
+	br i1 false, label %cond_true1008.critedge1190.i, label %cond_next811.i
+
+cond_next811.i:		; preds = %cond_true784.i
+	%indvar.next1412.i = add i8 %indvar1411.i.reg2mem.0, 1		; <i8> [#uses=2]
+	%tmp781.i = icmp eq i8 %indvar.next1412.i, 3		; <i1> [#uses=1]
+	br i1 %tmp781.i, label %cond_next9079, label %cond_true784.i
+
+cond_next862.i:		; preds = %cond_next670.i
+	ret i32 0
+
+cond_true1008.critedge.i:		; preds = %cond_next11.i.i
+	ret i32 0
+
+cond_true1008.critedge1171.i:		; preds = %cond_next349.i
+	ret i32 0
+
+cond_true1008.critedge1185.i:		; preds = %cond_next569.i
+	ret i32 0
+
+cond_true1008.critedge1190.i:		; preds = %cond_true784.i
+	%tmp621.i532.lcssa610 = phi i8* [ %tmp10.i.i527, %cond_true784.i ]		; <i8*> [#uses=0]
+	ret i32 0
+
+bb9065:		; preds = %cond_next8154
+	ret i32 0
+
+cond_next9079:		; preds = %cond_next811.i, %cond_false742.i
+	ret i32 0
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,103 @@
+; RUN: opt < %s -loop-unroll -S -unroll-count=4 | FileCheck %s
+; Test phi update after partial unroll.
+
+declare i1 @check() nounwind
+
+; CHECK: @test
+; CHECK: if.else:
+; CHECK: if.then.loopexit
+; CHECK: %sub5.lcssa = phi i32 [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ]
+; CHECK: if.else.3
+define void @test1(i32 %i, i32 %j) nounwind uwtable ssp {
+entry:
+  %cond1 = call zeroext i1 @check()
+  br i1 %cond1, label %if.then, label %if.else.lr.ph
+
+if.else.lr.ph:                                    ; preds = %entry
+  br label %if.else
+
+if.else:                                          ; preds = %if.else, %if.else.lr.ph
+  %sub = phi i32 [ %i, %if.else.lr.ph ], [ %sub5, %if.else ]
+  %sub5 = sub i32 %sub, %j
+  %cond2 = call zeroext i1 @check()
+  br i1 %cond2, label %if.then, label %if.else
+
+if.then:                                          ; preds = %if.else, %entry
+  %i.tr = phi i32 [ %i, %entry ], [ %sub5, %if.else ]
+  ret void
+
+}
+
+; PR7318: assertion failure after doing a simple loop unroll
+;
+; CHECK-LABEL: @test2(
+; CHECK: bb1.bb2_crit_edge:
+; CHECK: %.lcssa = phi i32 [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ]
+; CHECK: bb1.3:
+define i32 @test2(i32* nocapture %p, i32 %n) nounwind readonly {
+entry:
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %bb2
+
+bb.nph:                                           ; preds = %entry
+  %tmp = zext i32 %n to i64                       ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb.nph, %bb1
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb1 ] ; <i64> [#uses=2]
+  %s.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb1 ]    ; <i32> [#uses=1]
+  %scevgep = getelementptr i32, i32* %p, i64 %indvar   ; <i32*> [#uses=1]
+  %1 = load i32, i32* %scevgep, align 1                ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, %s.01                      ; <i32> [#uses=2]
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=2]
+  %exitcond = icmp ne i64 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb, label %bb1.bb2_crit_edge
+
+bb1.bb2_crit_edge:                                ; preds = %bb1
+  %.lcssa = phi i32 [ %2, %bb1 ]                  ; <i32> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb1.bb2_crit_edge, %entry
+  %s.0.lcssa = phi i32 [ %.lcssa, %bb1.bb2_crit_edge ], [ 0, %entry ] ; <i32> [#uses=1]
+  ret i32 %s.0.lcssa
+}
+
+; Check phi update for loop with an early-exit.
+;
+; CHECK-LABEL: @test3(
+; CHECK: return.loopexit:
+; CHECK: %tmp7.i.lcssa = phi i32 [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ]
+; CHECK: exit.3:
+define i32 @test3() nounwind uwtable ssp align 2 {
+entry:
+  %cond1 = call zeroext i1 @check()
+  br i1 %cond1, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  br label %do.body
+
+do.body:                                          ; preds = %do.cond, %if.end
+  %cond2 = call zeroext i1 @check()
+  br i1 %cond2, label %exit, label %do.cond
+
+exit:                  ; preds = %do.body
+  %tmp7.i = load i32, i32* undef, align 8
+  br i1 undef, label %do.cond, label %land.lhs.true
+
+land.lhs.true:                                    ; preds = %exit
+  br i1 undef, label %return, label %do.cond
+
+do.cond:                                          ; preds = %land.lhs.true, %exit, %do.body
+  %cond3 = call zeroext i1 @check()
+  br i1 %cond3, label %do.end, label %do.body
+
+do.end:                                           ; preds = %do.cond
+  br label %return
+
+return:                                           ; preds = %do.end, %land.lhs.true, %entry
+  %retval.0 = phi i32 [ 0, %do.end ], [ 0, %entry ], [ %tmp7.i, %land.lhs.true ]
+  ret i32 %retval.0
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,41 @@
+; RUN: opt -S < %s -loop-unroll -unroll-count=4 | FileCheck %s
+;
+; Test induction variable simplify after loop unrolling. It should
+; expose nice opportunities for GVN.
+;
+; CHECK-NOT: while.body also ensures that loop unrolling (with SCEV)
+; removes unrolled loop exits given that 128 is a multiple of 4.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+
+; PR10534: LoopUnroll not keeping canonical induction variable...
+; CHECK: while.body:
+; CHECK-NOT: while.body.1:
+; CHECK: %shr.1 = lshr i32 %bit_addr.addr.01, 5
+; CHECK: %arrayidx.1 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.1
+; CHECK: %shr.2 = lshr i32 %bit_addr.addr.01, 5
+; CHECK: %arrayidx.2 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.2
+; CHECK: %shr.3 = lshr i32 %bit_addr.addr.01, 5
+; CHECK: %arrayidx.3 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.3
+define void @FlipBit(i32* nocapture %bitmap, i32 %bit_addr, i32 %nbits) nounwind {
+entry:
+  br label %while.body
+
+while.body:
+  %nbits.addr.02 = phi i32 [ 128, %entry ], [ %dec, %while.body ]
+  %bit_addr.addr.01 = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %dec = add i32 %nbits.addr.02, -1
+  %shr = lshr i32 %bit_addr.addr.01, 5
+  %rem = and i32 %bit_addr.addr.01, 31
+  %shl = shl i32 1, %rem
+  %arrayidx = getelementptr inbounds i32, i32* %bitmap, i32 %shr
+  %tmp6 = load i32, i32* %arrayidx, align 4
+  %xor = xor i32 %tmp6, %shl
+  store i32 %xor, i32* %arrayidx, align 4
+  %inc = add i32 %bit_addr.addr.01, 1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,62 @@
+; RUN: opt -S < %s -instcombine -inline -jump-threading -loop-unroll -unroll-count=4 | FileCheck %s
+;
+; This is a test case that required a number of setup passes because
+; it depends on block order.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.8"
+
+declare i1 @check() nounwind
+declare i32 @getval() nounwind
+
+; Check that the loop exit merges values from all the iterations. This
+; could be a tad fragile, but it's a good test.
+;
+; CHECK-LABEL: @foo(
+; CHECK: return:
+; CHECK: %retval.0 = phi i32 [ %tmp7.i, %land.lhs.true ], [ 0, %do.cond ], [ %tmp7.i.1, %land.lhs.true.1 ], [ 0, %do.cond.1 ], [ %tmp7.i.2, %land.lhs.true.2 ], [ 0, %do.cond.2 ], [ %tmp7.i.3, %land.lhs.true.3 ], [ 0, %do.cond.3 ]
+; CHECK-NOT: @bar(
+; CHECK: bar.exit.3
+define i32 @foo() uwtable ssp align 2 {
+entry:
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %call2 = call i32 @getval()
+  br label %do.body
+
+do.body:                                          ; preds = %do.cond, %if.end
+  %call6 = call i32 @bar()
+  %cmp = icmp ne i32 %call6, 0
+  br i1 %cmp, label %land.lhs.true, label %do.cond
+
+land.lhs.true:                                    ; preds = %do.body
+  %call10 = call i32 @getval()
+  %cmp11 = icmp eq i32 0, %call10
+  br i1 %cmp11, label %return, label %do.cond
+
+do.cond:                                          ; preds = %land.lhs.true, %do.body
+  %cmp18 = icmp sle i32 0, %call2
+  br i1 %cmp18, label %do.body, label %return
+
+return:                                           ; preds = %do.cond, %land.lhs.true, %entry
+  %retval.0 = phi i32 [ 0, %entry ], [ %call6, %land.lhs.true ], [ 0, %do.cond ]
+  ret i32 %retval.0
+}
+
+define linkonce_odr i32 @bar() nounwind uwtable ssp align 2 {
+entry:
+  br i1 undef, label %land.lhs.true, label %cond.end
+
+land.lhs.true:                                    ; preds = %entry
+  %cmp4 = call zeroext i1 @check()
+  br i1 %cmp4, label %cond.true, label %cond.end
+
+cond.true:                                        ; preds = %land.lhs.true
+  %tmp7 = call i32 @getval()
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %land.lhs.true, %entry
+  %cond = phi i32 [ %tmp7, %cond.true ], [ 0, %land.lhs.true ], [ 0, %entry ]
+  ret i32 %cond
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=150 | FileCheck %s
+;
+; Verify that trunc i64 to i32 is considered free by loop unrolling
+; heuristics when i32 is a native type.
+; This should result in full unrolling this loop with size=7, TC=19.
+; If the trunc were not free we would have 8*19=152 > 150.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Check that for.body was unrolled 19 times.
+; CHECK-LABEL: @test(
+; CHECK: %0 = load
+; CHECK: %conv = sext i8 %0 to i32
+; CHECK: %add.1 = add nsw i32 %conv.1, %conv
+; CHECK: %add.18 = add nsw i32 %conv.18, %add.17
+; CHECK: ret i32 %add.18
+define i32 @test(i8* %arr) nounwind uwtable readnone {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i8, i8* %arr, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  %add = add nsw i32 %conv, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv1 = trunc i64 %indvars.iv.next to i32
+  %exitcond2 = icmp eq i32 %lftr.wideiv1, 19
+  br i1 %exitcond2, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt < %s -S -loop-unroll -simplifycfg | FileCheck %s
+; PR12513: Loop unrolling breaks with indirect branches.
+; If loop unrolling attempts to transform this loop, it replaces the
+; indirectbr successors. SimplifyCFG then considers them to be unreachable.
+declare void @subtract() nounwind uwtable
+
+; CHECK-NOT: unreachable
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable {
+entry:
+  %vals19 = alloca [5 x i32], align 16
+  %x20 = alloca i32, align 4
+  store i32 135, i32* %x20, align 4
+  br label %for.body
+
+for.body:                                         ; preds = ; %call2_termjoin, %call3_termjoin
+  %indvars.iv = phi i64 [ 0, %entry ], [ %joinphi15.in.in, %call2_termjoin ]
+  %a6 = call coldcc i8* @funca(i8* blockaddress(@main, %for.body_code), i8*
+blockaddress(@main, %for.body_codeprime)) nounwind
+  indirectbr i8* %a6, [label %for.body_code, label %for.body_codeprime]
+
+for.body_code:                                    ; preds = %for.body
+  call void @subtract()
+  br label %call2_termjoin
+
+call2_termjoin:                                   ; preds = %for.body_codeprime, %for.body_code
+  %joinphi15.in.in = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %joinphi15.in.in, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %call2_termjoin
+  ret i32 0
+
+for.body_codeprime:                               ; preds = %for.body
+  call void @subtract_v2(i64 %indvars.iv)
+  br label %call2_termjoin
+}
+
+declare coldcc i8* @funca(i8*, i8*) readonly
+
+declare void @subtract_v2(i64) nounwind uwtable

Added: llvm/trunk/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AArch64/falkor-prefetch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,169 @@
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor -enable-falkor-hwpf-unroll-fix=0 | FileCheck %s --check-prefix=NOHWPF
+
+; Check that loop unroller doesn't exhaust HW prefetcher resources.
+
+; Partial unroll 2 times for this loop on falkor instead of 4.
+; NOHWPF-LABEL: @unroll1(
+; NOHWPF-LABEL: loop:
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: icmp
+; NOHWPF-NEXT: br
+; NOHWPF-NEXT-LABEL: exit:
+;
+; CHECK-LABEL: @unroll1(
+; CHECK-LABEL: loop:
+; CHECK-NEXT: phi
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+; CHECK-NEXT: br
+; CHECK-NEXT-LABEL: exit:
+define void @unroll1(i32* %p, i32* %p2) {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+
+  %gep = getelementptr inbounds i32, i32* %p, i32 %iv
+  %load = load volatile i32, i32* %gep
+
+  %gep2 = getelementptr inbounds i32, i32* %p2, i32 %iv
+  %load2 = load volatile i32, i32* %gep2
+
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Partial unroll 4 times for this loop on falkor instead of 8.
+; NOHWPF-LABEL: @unroll2(
+; NOHWPF-LABEL: loop2:
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: phi
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: getelementptr
+; NOHWPF-NEXT: load
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: add
+; NOHWPF-NEXT: icmp
+; NOHWPF-NEXT: br
+; NOHWPF-NEXT-LABEL: exit2:
+;
+; CHECK-LABEL: @unroll2(
+; CHECK-LABEL: loop2:
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+; CHECK-NEXT: br
+; CHECK-NEXT-LABEL: exit2:
+
+define void @unroll2(i32* %p) {
+entry:
+  br label %loop1
+
+loop1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
+  %outer.sum = phi i32 [ 0, %entry ], [ %sum, %loop1.latch ]
+  br label %loop2.header
+
+loop2.header:
+  br label %loop2
+
+loop2:
+  %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
+  %sum = phi i32 [ %outer.sum, %loop2.header ], [ %sum.inc, %loop2 ]
+  %gep = getelementptr inbounds i32, i32* %p, i32 %iv2
+  %load = load i32, i32* %gep
+  %sum.inc = add i32 %sum, %load
+  %inc2 = add i32 %iv2, 1
+  %exitcnd2 = icmp uge i32 %inc2, 1024
+  br i1 %exitcnd2, label %exit2, label %loop2
+
+exit2:
+  br label %loop1.latch
+
+loop1.latch:
+  %inc1 = add i32 %iv1, 1
+  %exitcnd1 = icmp uge i32 %inc1, 1024
+  br i1 %exitcnd2, label %exit, label %loop1
+
+exit:
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,43 @@
+; RUN: opt -loop-unroll -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=UNROLL
+; RUN: opt -loop-unroll -unroll-max-upperbound=0 -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=NOUNROLL
+
+; This IR comes from this C code:
+;
+;   for (int i = 0; i < 4; i++) {
+;     if (src[i] == 1) {
+;       *dst = i;
+;       break;
+;     }
+;   }
+;
+; This test is meant to check that this loop is unrolled into four iterations.
+
+; UNROLL-LABEL: @test
+; UNROLL: load i32, i32*
+; UNROLL: load i32, i32*
+; UNROLL: load i32, i32*
+; UNROLL: load i32, i32*
+; UNROLL-NOT: load i32, i32*
+; NOUNROLL-LABEL: @test
+; NOUNROLL: load i32, i32*
+; NOUNROLL-NOT: load i32, i32*
+
+define void @test(i32* %dst, i32* %src) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = sext i32 %i to i64
+  %1 = getelementptr inbounds i32, i32* %src, i64 %0
+  %2 = load i32, i32* %1
+  %inc = add nsw i32 %i, 1
+  %cmp1 = icmp slt i32 %inc, 4
+  %cmp3 = icmp eq i32 %2, 1 
+  %or.cond = and i1 %cmp3, %cmp1
+  br i1 %or.cond, label %for.body, label %exit
+
+exit:                                          ; preds = %for.body
+  store i32 %i, i32* %dst
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/AArch64/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AArch64/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AArch64/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AArch64/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/LoopUnroll/AArch64/partial.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AArch64/partial.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AArch64/partial.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AArch64/partial.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,76 @@
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
+
+; Partial unroll 8 times for this loop.
+define void @unroll1() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+
+; Partial unroll 16 times for this loop.
+define void @unroll2() nounwind {
+entry:
+  br label %loop1
+
+loop1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
+  br label %loop2.header
+
+loop2.header:
+  br label %loop2
+
+loop2:
+  %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
+  %inc2 = add i32 %iv2, 1
+  %exitcnd2 = icmp uge i32 %inc2, 1024
+  br i1 %exitcnd2, label %exit2, label %loop2
+
+exit2:
+  br label %loop1.latch
+
+loop1.latch:
+  %inc1 = add i32 %iv1, 1
+  %exitcnd1 = icmp uge i32 %inc1, 1024
+  br i1 %exitcnd2, label %exit, label %loop1
+
+exit:
+  ret void
+}
+
+
+
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp

Added: llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
+; Tests for unrolling loops with run-time trip counts
+
+; EPILOG:  %xtraiter = and i32 %n
+; EPILOG:  for.body:
+; EPILOG:  %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG:  br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
+; EPILOG:  for.body.epil:
+
+; PROLOG:  %xtraiter = and i32 %n
+; PROLOG:  %lcmp.mod = icmp ne i32 %xtraiter, 0
+; PROLOG:  br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
+; PROLOG:  for.body.prol:
+; PROLOG:  for.body:
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+

Added: llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,33 @@
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test_unroll_convergent_barrier(
+; CHECK: call void @llvm.amdgcn.s.barrier()
+; CHECK: call void @llvm.amdgcn.s.barrier()
+; CHECK: call void @llvm.amdgcn.s.barrier()
+; CHECK: call void @llvm.amdgcn.s.barrier()
+; CHECK-NOT: br
+define amdgpu_kernel void @test_unroll_convergent_barrier(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(1)* noalias nocapture %in) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx.in = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %indvars.iv
+  %arrayidx.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %indvars.iv
+  %load = load i32, i32 addrspace(1)* %arrayidx.in
+  call void @llvm.amdgcn.s.barrier() #1
+  %add = add i32 %load, %sum.02
+  store i32 %add, i32 addrspace(1)* %arrayidx.out
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare void @llvm.amdgcn.s.barrier() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind convergent }

Added: llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-call.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,77 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -unroll-threshold=100 -unroll-peel-count=0 -unroll-allow-partial=false -unroll-max-iteration-count-to-analyze=16 < %s | FileCheck %s
+
+; CHECK-LABEL: @test_intrinsic_call_cost(
+; CHECK-NOT: br i1
+define amdgpu_kernel void @test_intrinsic_call_cost(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture %in) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
+  %arrayidx.in = getelementptr inbounds float, float addrspace(1)* %in, i32 %indvars.iv
+  %arrayidx.out = getelementptr inbounds float, float addrspace(1)* %out, i32 %indvars.iv
+  %load = load float, float addrspace(1)* %arrayidx.in
+  %call = call float @llvm.minnum.f32(float %load, float 1.0);
+  %fmul = fmul float %call, %sum.02
+  store float %fmul, float addrspace(1)* %arrayidx.out
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 16
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @test_func_call_cost(
+; CHECK: br i1 %exitcond
+define amdgpu_kernel void @test_func_call_cost(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture %in) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
+  %arrayidx.in = getelementptr inbounds float, float addrspace(1)* %in, i32 %indvars.iv
+  %arrayidx.out = getelementptr inbounds float, float addrspace(1)* %out, i32 %indvars.iv
+  %load = load float, float addrspace(1)* %arrayidx.in
+  %fptr = load float(float, float)*, float(float, float )* addrspace(4)* null
+  %call = tail call float %fptr(float %load, float 1.0)
+  %fmul = fmul float %call, %sum.02
+  store float %fmul, float addrspace(1)* %arrayidx.out
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 16
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @test_indirect_call_cost(
+; CHECK: br i1 %exitcond
+define amdgpu_kernel void @test_indirect_call_cost(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture %in) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
+  %arrayidx.in = getelementptr inbounds float, float addrspace(1)* %in, i32 %indvars.iv
+  %arrayidx.out = getelementptr inbounds float, float addrspace(1)* %out, i32 %indvars.iv
+  %load = load float, float addrspace(1)* %arrayidx.in
+  %min = call float @func(float %load, float 1.0);
+  %fmul = fmul float %min, %sum.02
+  store float %fmul, float addrspace(1)* %arrayidx.out
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 16
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+declare float @llvm.minnum.f32(float, float) #1
+declare float @func(float, float) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }

Added: llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,154 @@
+; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=20000 %s | FileCheck %s
+
+; Check that we full unroll loop to be able to eliminate alloca
+; CHECK-LABEL: @non_invariant_ind
+; CHECK:       for.body:
+; CHECK-NOT:   br
+; CHECK:       store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
+; CHECK:       ret void
+
+define amdgpu_kernel void @non_invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
+entry:
+  %arr = alloca [64 x i32], align 4, addrspace(5)
+  %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  %arrayidx5 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %x
+  %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
+  %arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
+  store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %idxprom = sext i32 %i.015 to i64
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
+  %tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %add = add nsw i32 %i.015, %tmp1
+  %rem = srem i32 %add, 64
+  %arrayidx3 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+  store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
+  %inc = add nuw nsw i32 %i.015, 1
+  %exitcond = icmp eq i32 %inc, 100
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; Check that we unroll inner loop but not outer
+; CHECK-LABEL: @invariant_ind
+; CHECK:       %[[exitcond:[^ ]+]] = icmp eq i32 %{{.*}}, 32
+; CHECK:       br i1 %[[exitcond]]
+; CHECK-NOT:   icmp eq i32 %{{.*}}, 100
+
+define amdgpu_kernel void @invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
+entry:
+  %arr = alloca [64 x i32], align 4, addrspace(5)
+  %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  br label %for.cond2.preheader
+
+for.cond2.preheader:                              ; preds = %for.cond.cleanup5, %entry
+  %i.026 = phi i32 [ 0, %entry ], [ %inc10, %for.cond.cleanup5 ]
+  %idxprom = sext i32 %i.026 to i64
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
+  %tmp15 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  br label %for.body6
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup5
+  %arrayidx13 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %x
+  %tmp16 = load i32, i32 addrspace(5)* %arrayidx13, align 4
+  %arrayidx15 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
+  store i32 %tmp16, i32 addrspace(1)* %arrayidx15, align 4
+  ret void
+
+for.cond.cleanup5:                                ; preds = %for.body6
+  %inc10 = add nuw nsw i32 %i.026, 1
+  %exitcond27 = icmp eq i32 %inc10, 32
+  br i1 %exitcond27, label %for.cond.cleanup, label %for.cond2.preheader
+
+for.body6:                                        ; preds = %for.body6, %for.cond2.preheader
+  %j.025 = phi i32 [ 0, %for.cond2.preheader ], [ %inc, %for.body6 ]
+  %add = add nsw i32 %j.025, %tmp1
+  %rem = srem i32 %add, 64
+  %arrayidx8 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+  store i32 %tmp15, i32 addrspace(5)* %arrayidx8, align 4
+  %inc = add nuw nsw i32 %j.025, 1
+  %exitcond = icmp eq i32 %inc, 100
+  br i1 %exitcond, label %for.cond.cleanup5, label %for.body6
+}
+
+; Check we do not enforce unroll if alloca is too big
+; CHECK-LABEL: @too_big
+; CHECK:       for.body:
+; CHECK:       icmp eq i32 %{{.*}}, 100
+; CHECK:       br
+
+define amdgpu_kernel void @too_big(i32 addrspace(1)* nocapture %a, i32 %x) {
+entry:
+  %arr = alloca [256 x i32], align 4, addrspace(5)
+  %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  %arrayidx5 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(5)* %arr, i32 0, i32 %x
+  %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
+  %arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
+  store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %idxprom = sext i32 %i.015 to i64
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
+  %tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %add = add nsw i32 %i.015, %tmp1
+  %rem = srem i32 %add, 64
+  %arrayidx3 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+  store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
+  %inc = add nuw nsw i32 %i.015, 1
+  %exitcond = icmp eq i32 %inc, 100
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; Check we do not enforce unroll if alloca is dynamic
+; CHECK-LABEL: @dynamic_size_alloca(
+; CHECK: alloca i32, i32 %n
+; CHECK:       for.body:
+; CHECK:       icmp eq i32 %{{.*}}, 100
+; CHECK:       br
+
+define amdgpu_kernel void @dynamic_size_alloca(i32 addrspace(1)* nocapture %a, i32 %n, i32 %x) {
+entry:
+  %arr = alloca i32, i32 %n, align 4, addrspace(5)
+  %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  %arrayidx5 = getelementptr inbounds i32, i32 addrspace(5)* %arr, i32 %x
+  %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
+  %arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
+  store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %idxprom = sext i32 %i.015 to i64
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom
+  %tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %add = add nsw i32 %i.015, %tmp1
+  %rem = srem i32 %add, 64
+  %arrayidx3 = getelementptr inbounds i32, i32 addrspace(5)* %arr, i32 %rem
+  store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
+  %inc = add nuw nsw i32 %i.015, 1
+  %exitcond = icmp eq i32 %inc, 100
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+declare i32 @llvm.amdgcn.workgroup.id.x() #1
+
+declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
+
+attributes #1 = { nounwind readnone }

Added: llvm/trunk/test/Transforms/LoopUnroll/ARM/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/ARM/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/ARM/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/ARM/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/LoopUnroll/ARM/loop-unrolling.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/ARM/loop-unrolling.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/ARM/loop-unrolling.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/ARM/loop-unrolling.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,247 @@
+; RUN: opt -mtriple=armv7 -mcpu=cortex-a57 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-A
+; RUN: opt -mtriple=thumbv7 -mcpu=cortex-a57 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-A
+; RUN: opt -mtriple=thumbv7 -mcpu=cortex-a72 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-A
+; RUN: opt -mtriple=thumbv8m -mcpu=cortex-m23 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-T1
+; RUN: opt -mtriple=thumbv8m.main -mcpu=cortex-m33 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-T2
+; RUN: opt -mtriple=thumbv7em -mcpu=cortex-m7 -loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL-T2
+
+; CHECK-LABEL: partial
+define arm_aapcs_vfpcc void @partial(i32* nocapture %C, i32* nocapture readonly %A, i32* nocapture readonly %B) local_unnamed_addr #0 {
+entry:
+  br label %for.body
+
+; CHECK-LABEL: for.body
+for.body:
+
+; CHECK-UNROLL-A: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV2:%[a-z.0-9]+]], %for.body ]
+; CHECK-UNROLL-A: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
+; CHECK-UNROLL-A: [[IV2]] = add nuw nsw i32 [[IV1]], 1
+; CHECK-UNROLL-A: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV2]], 1024
+; CHECK-UNROLL-A: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body
+
+; CHECK-UNROLL-T1: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ]
+; CHECK-UNROLL-T1: [[IV1]] = add nuw nsw i32 [[IV0]], 1
+; CHECK-UNROLL-T1: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV1]], 1024
+; CHECK-UNROLL-T1: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body
+
+; CHECK-UNROLL-T2: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV16:%[a-z.0-9]+]], %for.body ]
+; CHECK-UNROLL-T2: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
+; CHECK-UNROLL-T2: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
+; CHECK-UNROLL-T2: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
+; CHECK-UNROLL-T2: [[IV4:%[a-z.0-9]+]] = add nuw nsw i32 [[IV3]], 1
+; CHECK-UNROLL-T2: [[IV5:%[a-z.0-9]+]] = add nuw nsw i32 [[IV4]], 1
+; CHECK-UNROLL-T2: [[IV6:%[a-z.0-9]+]] = add nuw nsw i32 [[IV5]], 1
+; CHECK-UNROLL-T2: [[IV7:%[a-z.0-9]+]] = add nuw nsw i32 [[IV6]], 1
+; CHECK-UNROLL-T2: [[IV8:%[a-z.0-9]+]] = add nuw nsw i32 [[IV7]], 1
+; CHECK-UNROLL-T2: [[IV9:%[a-z.0-9]+]] = add nuw nsw i32 [[IV8]], 1
+; CHECK-UNROLL-T2: [[IV10:%[a-z.0-9]+]] = add nuw nsw i32 [[IV9]], 1
+; CHECK-UNROLL-T2: [[IV11:%[a-z.0-9]+]] = add nuw nsw i32 [[IV10]], 1
+; CHECK-UNROLL-T2: [[IV12:%[a-z.0-9]+]] = add nuw nsw i32 [[IV11]], 1
+; CHECK-UNROLL-T2: [[IV13:%[a-z.0-9]+]] = add nuw nsw i32 [[IV12]], 1
+; CHECK-UNROLL-T2: [[IV14:%[a-z.0-9]+]] = add nuw nsw i32 [[IV13]], 1
+; CHECK-UNROLL-T2: [[IV15:%[a-z.0-9]+]] = add nuw nsw i32 [[IV14]], 1
+; CHECK-UNROLL-T2: [[IV16]] = add nuw nsw i32 [[IV15]], 1
+; CHECK-UNROLL-T2: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV16]], 1024
+; CHECK-UNROLL-T2: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body
+
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %B, i32 %i.08
+  %1 = load i32, i32* %arrayidx1, align 4
+  %mul = mul nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %C, i32 %i.08
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 1024
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+; CHECK-LABEL: runtime
+define arm_aapcs_vfpcc void @runtime(i32* nocapture %C, i32* nocapture readonly %A, i32* nocapture readonly %B, i32 %N) local_unnamed_addr #0 {
+entry:
+  %cmp8 = icmp eq i32 %N, 0
+  br i1 %cmp8, label %for.cond.cleanup, label %for.body
+
+; CHECK-LABEL: for.body
+for.body:
+; CHECK-UNROLL-A: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z.0-9]+]] ], [ [[IV2:%[a-z.0-9]+]], %for.body ]
+; CHECK-UNROLL-A: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
+; CHECK-UNROLL-A: [[IV2]] = add nuw i32 [[IV1]], 1
+; CHECK-UNROLL-A: br
+
+; CHECK-UNROLL-T1: %i.09 = phi i32 [ %inc, %for.body ], [ 0
+; CHECK-UNROLL-T1: %inc = add nuw i32 %i.09, 1
+; CHECK-UNROLL-T1: %exitcond = icmp eq i32 %inc, %N
+; CHECK-UNROLL-T1: br
+
+; CHECK-UNROLL-T2: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z.0-9]+]] ], [ [[IV4:%[a-z.0-9]+]], %for.body ]
+; CHECK-UNROLL-T2: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
+; CHECK-UNROLL-T2: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
+; CHECK-UNROLL-T2: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
+; CHECK-UNROLL-T2: [[IV4]] = add nuw i32 [[IV3]], 1
+; CHECK-UNROLL-T2: br
+
+; CHECK-UNROLL-T2: for.body.epil:
+; CHECK-UNROLL-T2: for.body.epil.1:
+; CHECK-UNROLL-T2: for.body.epil.2:
+
+  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %B, i32 %i.09
+  %1 = load i32, i32* %arrayidx1, align 4
+  %mul = mul nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %C, i32 %i.09
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nuw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+; CHECK-LABEL: nested_runtime
+define arm_aapcs_vfpcc void @nested_runtime(i32* nocapture %C, i16* nocapture readonly %A, i16* nocapture readonly %B, i32 %N) local_unnamed_addr #0 {
+entry:
+  %cmp25 = icmp eq i32 %N, 0
+  br i1 %cmp25, label %for.cond.cleanup, label %for.body4.lr.ph
+
+for.body4.lr.ph:
+  %h.026 = phi i32 [ %inc11, %for.cond.cleanup3 ], [ 0, %entry ]
+  %mul = mul i32 %h.026, %N
+  br label %for.body4
+
+for.cond.cleanup:
+  ret void
+
+for.cond.cleanup3:
+  %inc11 = add nuw i32 %h.026, 1
+  %exitcond27 = icmp eq i32 %inc11, %N
+  br i1 %exitcond27, label %for.cond.cleanup, label %for.body4.lr.ph
+
+; CHECK-LABEL: for.body4
+for.body4:
+; CHECK-UNROLL-T1: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z0-9.]+]] ], [ [[IV1:%[a-z.0-9]+]], %for.body4 ]
+; CHECK-UNROLL-T1: [[IV1]] = add nuw i32 [[IV0]], 1
+; CHECK-UNROLL-T1: br
+
+; CHECK-UNROLL-T2: for.body4.epil:
+; CHECK-UNROLL-T2: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z0-9.]+]] ], [ [[IV4:%[a-z.0-9]+]], %for.body4 ]
+; CHECK-UNROLL-T2: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
+; CHECK-UNROLL-T2: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
+; CHECK-UNROLL-T2: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
+; CHECK-UNROLL-T2: [[IV4]] = add nuw i32 [[IV3]], 1
+; CHECK-UNROLL-T2: br
+; CHECK-UNROLL-T2: for.body4.epil.1:
+; CHECK-UNROLL-T2: for.body4.epil.2:
+
+  %w.024 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ]
+  %add = add i32 %w.024, %mul
+  %arrayidx = getelementptr inbounds i16, i16* %A, i32 %add
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %0 to i32
+  %arrayidx5 = getelementptr inbounds i16, i16* %B, i32 %w.024
+  %1 = load i16, i16* %arrayidx5, align 2
+  %conv6 = sext i16 %1 to i32
+  %mul7 = mul nsw i32 %conv6, %conv
+  %arrayidx8 = getelementptr inbounds i32, i32* %C, i32 %w.024
+  %2 = load i32, i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %mul7, %2
+  store i32 %add9, i32* %arrayidx8, align 4
+  %inc = add nuw i32 %w.024, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup3, label %for.body4
+}
+
+; CHECK-LABEL: loop_call
+define arm_aapcs_vfpcc void @loop_call(i32* nocapture %C, i32* nocapture readonly %A, i32* nocapture readonly %B) local_unnamed_addr #1 {
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+; CHECK-LABEL: for.body
+for.body:
+; CHECK-UNROLL-A: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ]
+; CHECK-UNROLL-A: [[IV1]] = add nuw nsw i32 [[IV0]], 1
+; CHECK-UNROLL-A: icmp eq i32 [[IV1]], 1024
+; CHECK-UNROLL-A: br
+
+; CHECK-UNROLL-T1: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ]
+; CHECK-UNROLL-T1: [[IV1]] = add nuw nsw i32 [[IV0]], 1
+; CHECK-UNROLL-T1: icmp eq i32 [[IV1]], 1024
+; CHECK-UNROLL-T1: br
+
+; CHECK-UNROLL-T2: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ]
+; CHECK-UNROLL-T2: [[IV1]] = add nuw nsw i32 [[IV0]], 1
+; CHECK-UNROLL-T2: icmp eq i32 [[IV1]], 1024
+; CHECK-UNROLL-T2: br
+
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %B, i32 %i.08
+  %1 = load i32, i32* %arrayidx1, align 4
+  %call = tail call arm_aapcs_vfpcc i32 @some_func(i32 %0, i32 %1) #3
+  %arrayidx2 = getelementptr inbounds i32, i32* %C, i32 %i.08
+  store i32 %call, i32* %arrayidx2, align 4
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 1024
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: iterate_inc
+; CHECK-UNROLL-A: %n.addr.04 = phi %struct.Node* [ %1, %while.body ], [ %n, %while.body.preheader ]
+; CHECK-UNROLL-A: %tobool = icmp eq %struct.Node* %1, null
+; CHECK-UNROLL-A: br i1 %tobool
+; CHECK-UNROLL-A-NOT: load
+
+; CHECK-UNROLL-T1: %n.addr.04 = phi %struct.Node* [ %1, %while.body ], [ %n, %while.body.preheader ]
+; CHECK-UNROLL-T1: %tobool = icmp eq %struct.Node* %1, null
+; CHECK-UNROLL-T1: br i1 %tobool
+; CHECK-UNROLL-T1-NOT: load
+
+; CHECK-UNROLL-T2: [[CMP0:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR0:%[a-z.0-9]+]], null
+; CHECK-UNROLL-T2: br i1 [[CMP0]], label [[END:%[a-z.0-9]+]]
+; CHECK-UNROLL-T2: [[CMP1:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR1:%[a-z.0-9]+]], null
+; CHECK-UNROLL-T2: br i1 [[CMP1]], label [[END]]
+; CHECK-UNROLL-T2: [[CMP2:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR2:%[a-z.0-9]+]], null
+; CHECK-UNROLL-T2: br i1 [[CMP2]], label [[END]]
+; CHECK-UNROLL-T2: [[CMP3:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR3:%[a-z.0-9]+]], null
+; CHECK-UNROLL-T2: br i1 [[CMP3]], label [[END]]
+; CHECK-UNROLL-T2: [[CMP4:%[a-z.0-9]+]] = icmp eq %struct.Node* [[VAR4:%[a-z.0-9]+]], null
+; CHECK-UNROLL-T2: br i1 [[CMP4]], label [[END]]
+; CHECK-UNROLL-T2-NOT: load
+
+%struct.Node = type { %struct.Node*, i32 }
+
+define arm_aapcscc void @iterate_inc(%struct.Node* %n) local_unnamed_addr #0 {
+entry:
+  %tobool3 = icmp eq %struct.Node* %n, null
+  br i1 %tobool3, label %while.end, label %while.body.preheader
+
+while.body.preheader:
+  br label %while.body
+
+while.body:
+  %n.addr.04 = phi %struct.Node* [ %1, %while.body ], [ %n, %while.body.preheader ]
+  %val = getelementptr inbounds %struct.Node, %struct.Node* %n.addr.04, i32 0, i32 1
+  %0 = load i32, i32* %val, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %val, align 4
+  %next = getelementptr inbounds %struct.Node, %struct.Node* %n.addr.04, i32 0, i32 0
+  %1 = load %struct.Node*, %struct.Node** %next, align 4
+  %tobool = icmp eq %struct.Node* %1, null
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+declare arm_aapcs_vfpcc i32 @some_func(i32, i32) local_unnamed_addr #2

Added: llvm/trunk/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/ARM/multi-blocks.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/ARM/multi-blocks.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/ARM/multi-blocks.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,316 @@
+; RUN: opt -mtriple=thumbv8m.main -mcpu=cortex-m33 -loop-unroll -S < %s -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv7em -mcpu=cortex-m7 -loop-unroll -S < %s -o - | FileCheck %s
+
+;CHECK-LABEL: test_three_blocks
+;CHECK: for.body.epil:
+;CHECK: if.then.epil:
+;CHECK: for.inc.epil:
+;CHECK: for.body:
+;CHECK: if.then:
+;CHECK: for.inc:
+;CHECK: for.body.epil.1:
+;CHECK: if.then.epil.1:
+;CHECK: for.inc.epil.1:
+;CHECK: for.body.epil.2:
+;CHECK: if.then.epil.2:
+;CHECK: for.inc.epil.2:
+;CHECK: if.then.1:
+;CHECK: for.inc.1:
+;CHECK: if.then.2:
+;CHECK: for.inc.2:
+;CHECK: if.then.3:
+;CHECK: for.inc.3:
+define void @test_three_blocks(i32* nocapture %Output,
+                               i32* nocapture readonly %Condition,
+                               i32* nocapture readonly %Input,
+                               i32 %MaxJ) {
+entry:
+  %cmp8 = icmp eq i32 %MaxJ, 0
+  br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.inc, %entry
+  %temp.0.lcssa = phi i32 [ 0, %entry ], [ %temp.1, %for.inc ]
+  store i32 %temp.0.lcssa, i32* %Output, align 4
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc
+  %j.010 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
+  %temp.09 = phi i32 [ %temp.1, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %Condition, i32 %j.010
+  %0 = load i32, i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %arrayidx1 = getelementptr inbounds i32, i32* %Input, i32 %j.010
+  %1 = load i32, i32* %arrayidx1, align 4
+  %add = add i32 %1, %temp.09
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %temp.1 = phi i32 [ %add, %if.then ], [ %temp.09, %for.body ]
+  %inc = add nuw i32 %j.010, 1
+  %exitcond = icmp eq i32 %inc, %MaxJ
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+;CHECK-LABEL: test_two_exits
+;CHECK: for.body:
+;CHECK: if.end:
+;CHECK: cleanup.loopexit:
+;CHECK: cleanup:
+;CHECK: for.body.1:
+;CHECK: if.end.1:
+;CHECK: for.body.2:
+;CHECK: if.end.2:
+;CHECK: for.body.3:
+;CHECK: if.end.3:
+define void @test_two_exits(i32* nocapture %Output,
+                            i32* nocapture readonly %Condition,
+                            i32* nocapture readonly %Input,
+                            i32 %MaxJ) {
+entry:
+  %cmp14 = icmp eq i32 %MaxJ, 0
+  br i1 %cmp14, label %cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %if.end
+  %j.016 = phi i32 [ %inc, %if.end ], [ 0, %for.body.preheader ]
+  %temp.015 = phi i32 [ %temp.0.add, %if.end ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %Input, i32 %j.016
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp ugt i32 %0, 65535
+  br i1 %cmp1, label %cleanup, label %if.end
+
+if.end:                                           ; preds = %for.body
+  %arrayidx2 = getelementptr inbounds i32, i32* %Condition, i32 %j.016
+  %1 = load i32, i32* %arrayidx2, align 4
+  %tobool = icmp eq i32 %1, 0
+  %add = select i1 %tobool, i32 0, i32 %0
+  %temp.0.add = add i32 %add, %temp.015
+  %inc = add nuw i32 %j.016, 1
+  %cmp = icmp ult i32 %inc, %MaxJ
+  br i1 %cmp, label %for.body, label %cleanup
+
+cleanup:                                          ; preds = %if.end, %for.body, %entry
+  %temp.0.lcssa = phi i32 [ 0, %entry ], [ %temp.015, %for.body ], [ %temp.0.add, %if.end ]
+  store i32 %temp.0.lcssa, i32* %Output, align 4
+  ret void
+}
+
+;CHECK-LABEL: test_three_exits
+;CHECK-NOT: for.body.epil
+;CHECK-NOT: if.end.epil
+;CHECK-LABEL: for.body
+;CHECK-LABEL: if.end
+;CHECK-LABEL: if.end5
+define void @test_three_exits(i32* nocapture %Output,
+                              i32* nocapture readonly %Condition,
+                              i32* nocapture readonly %Input,
+                              i32 %MaxJ) {
+entry:
+  %cmp20 = icmp eq i32 %MaxJ, 0
+  br i1 %cmp20, label %cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %if.end5
+  %j.022 = phi i32 [ %inc, %if.end5 ], [ 0, %for.body.preheader ]
+  %temp.021 = phi i32 [ %temp.0.add, %if.end5 ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %Condition, i32 %j.022
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp ugt i32 %0, 65535
+  br i1 %cmp1, label %cleanup, label %if.end
+
+if.end:                                           ; preds = %for.body
+  %arrayidx2 = getelementptr inbounds i32, i32* %Input, i32 %j.022
+  %1 = load i32, i32* %arrayidx2, align 4
+  %cmp3 = icmp ugt i32 %1, 65535
+  br i1 %cmp3, label %cleanup, label %if.end5
+
+if.end5:                                          ; preds = %if.end
+  %tobool = icmp eq i32 %0, 0
+  %add = select i1 %tobool, i32 0, i32 %1
+  %temp.0.add = add i32 %add, %temp.021
+  %inc = add nuw i32 %j.022, 1
+  %cmp = icmp ult i32 %inc, %MaxJ
+  br i1 %cmp, label %for.body, label %cleanup
+
+cleanup:                                          ; preds = %if.end5, %for.body, %if.end, %entry
+  %temp.0.lcssa = phi i32 [ 0, %entry ], [ %temp.021, %if.end ], [ %temp.021, %for.body ], [ %temp.0.add, %if.end5 ]
+  store i32 %temp.0.lcssa, i32* %Output, align 4
+  ret void
+}
+
+;CHECK-LABEL: test_four_blocks
+;CHECK: for.body.epil:
+;CHECK: if.else.epil:
+;CHECK: if.then.epil:
+;CHECK: for.cond.cleanup:
+;CHECK: for.body:
+;CHECK: if.then:
+;CHECK: for.inc:
+;CHECK: for.body.epil.1:
+;CHECK: if.else.epil.1:
+;CHECK: if.then.epil.1:
+;CHECK: for.inc.epil.1:
+;CHECK: for.body.epil.2:
+;CHECK: if.else.epil.2:
+;CHECK: if.then.epil.2:
+;CHECK: for.inc.epil.2:
+;CHECK: if.else.1:
+;CHECK: if.then.1:
+;CHECK: for.inc.1:
+;CHECK: if.else.2:
+;CHECK: if.then.2:
+;CHECK: for.inc.2:
+;CHECK: if.else.3:
+;CHECK: if.then.3:
+;CHECK: for.inc.3:
+define void @test_four_blocks(i32* nocapture %Output,
+                              i32* nocapture readonly %Condition,
+                              i32* nocapture readonly %Input,
+                              i32 %MaxJ) {
+entry:
+  %cmp25 = icmp ugt i32 %MaxJ, 1
+  br i1 %cmp25, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %.pre = load i32, i32* %Input, align 4
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.inc, %entry
+  %temp.0.lcssa = phi i32 [ 0, %entry ], [ %temp.1, %for.inc ]
+  store i32 %temp.0.lcssa, i32* %Output, align 4
+  ret void
+
+for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+  %0 = phi i32 [ %.pre, %for.body.lr.ph ], [ %2, %for.inc ]
+  %j.027 = phi i32 [ 1, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %temp.026 = phi i32 [ 0, %for.body.lr.ph ], [ %temp.1, %for.inc ]
+  %arrayidx = getelementptr inbounds i32, i32* %Condition, i32 %j.027
+  %1 = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp ugt i32 %1, 65535
+  %arrayidx2 = getelementptr inbounds i32, i32* %Input, i32 %j.027
+  %2 = load i32, i32* %arrayidx2, align 4
+  %cmp4 = icmp ugt i32 %2, %0
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:                                          ; preds = %for.body
+  %cond = zext i1 %cmp4 to i32
+  %add = add i32 %temp.026, %cond
+  br label %for.inc
+
+if.else:                                          ; preds = %for.body
+  %not.cmp4 = xor i1 %cmp4, true
+  %sub = sext i1 %not.cmp4 to i32
+  %sub10.sink = add i32 %j.027, %sub
+  %arrayidx11 = getelementptr inbounds i32, i32* %Input, i32 %sub10.sink
+  %3 = load i32, i32* %arrayidx11, align 4
+  %sub13 = sub i32 %temp.026, %3
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %if.else
+  %temp.1 = phi i32 [ %add, %if.then ], [ %sub13, %if.else ]
+  %inc = add nuw i32 %j.027, 1
+  %exitcond = icmp eq i32 %inc, %MaxJ
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+;CHECK-LABEL: test_five_blocks
+;CHECK-NOT: for.body.epil:
+;CHECK: for.body:
+;CHECK: if.end:
+;CHECK: if.else:
+;CHECK: for.inc:
+;CHECK-NOT: for.inc.1:
+define void @test_five_blocks(i32* nocapture %Output,
+                              i32* nocapture readonly %Condition,
+                              i32* nocapture readonly %Input,
+                              i32 %MaxJ) {
+entry:
+  %cmp24 = icmp ugt i32 %MaxJ, 1
+  br i1 %cmp24, label %for.body.preheader, label %cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc
+  %j.026 = phi i32 [ %inc, %for.inc ], [ 1, %for.body.preheader ]
+  %temp.025 = phi i32 [ %temp.1, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %Input, i32 %j.026
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %temp.025
+  %cmp1 = icmp ugt i32 %add, 16777215
+  br i1 %cmp1, label %cleanup, label %if.end
+
+if.end:                                           ; preds = %for.body
+  %arrayidx2 = getelementptr inbounds i32, i32* %Condition, i32 %j.026
+  %1 = load i32, i32* %arrayidx2, align 4
+  %cmp3 = icmp ugt i32 %1, 65535
+  br i1 %cmp3, label %if.then4, label %if.else
+
+if.then4:                                         ; preds = %if.end
+  %sub = add i32 %j.026, -1
+  %arrayidx6 = getelementptr inbounds i32, i32* %Input, i32 %sub
+  %2 = load i32, i32* %arrayidx6, align 4
+  %cmp7 = icmp ugt i32 %0, %2
+  %cond = zext i1 %cmp7 to i32
+  %add8 = add i32 %add, %cond
+  br label %for.inc
+
+if.else:                                          ; preds = %if.end
+  %and = and i32 %add, %0
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then4, %if.else
+  %temp.1 = phi i32 [ %add8, %if.then4 ], [ %and, %if.else ]
+  %inc = add nuw i32 %j.026, 1
+  %cmp = icmp ult i32 %inc, %MaxJ
+  br i1 %cmp, label %for.body, label %cleanup
+
+cleanup:                                          ; preds = %for.inc, %for.body, %entry
+  %temp.2 = phi i32 [ 0, %entry ], [ %add, %for.body ], [ %temp.1, %for.inc ]
+  store i32 %temp.2, i32* %Output, align 4
+  ret void
+}
+
+;CHECK-LABEL: iterate_inc
+;CHECK: while.body:
+;CHECK: while.end:
+;CHECK: while.body.1:
+;CHECK: while.body.2:
+;CHECK: while.body.3:
+%struct.Node = type { %struct.Node*, i32 }
+define void @iterate_inc(%struct.Node* %n, i32 %limit) {
+entry:
+  %tobool5 = icmp eq %struct.Node* %n, null
+  br i1 %tobool5, label %while.end, label %land.rhs.preheader
+
+land.rhs.preheader:                               ; preds = %entry
+  br label %land.rhs
+
+land.rhs:                                         ; preds = %land.rhs.preheader, %while.body
+  %list.addr.06 = phi %struct.Node* [ %2, %while.body ], [ %n, %land.rhs.preheader ]
+  %val = getelementptr inbounds %struct.Node, %struct.Node* %list.addr.06, i32 0, i32 1
+  %0 = load i32, i32* %val, align 4
+  %cmp = icmp slt i32 %0, %limit
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %land.rhs
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %val, align 4
+  %1 = bitcast %struct.Node* %list.addr.06 to %struct.Node**
+  %2 = load %struct.Node*, %struct.Node** %1, align 4
+  %tobool = icmp eq %struct.Node* %2, null
+  br i1 %tobool, label %while.end, label %land.rhs
+
+while.end:                                        ; preds = %land.rhs, %while.body, %entry
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/Hexagon/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/Hexagon/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/Hexagon/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/Hexagon/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'Hexagon' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt -loop-unroll -mtriple=hexagon -S < %s | FileCheck %s
+; Check that the loop is peeled twice for Hexagon.
+; CHECK: while.body.peel
+; CHECK: while.body.peel2
+
+%struct.STREAM = type { %union.anon, i32, i32 }
+%union.anon = type { i32* }
+
+define void @function(%struct.STREAM* nocapture readonly %b) local_unnamed_addr {
+entry:
+  %bitPtr3 = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 2
+  %0 = load i32, i32* %bitPtr3, align 4
+  %cmp11 = icmp ult i32 %0, 32
+  br i1 %cmp11, label %while.body.preheader, label %do.end
+
+while.body.preheader:
+  %value2 = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 1
+  %1 = load i32, i32* %value2, align 4
+  %w = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 0, i32 0
+  %2 = load i32*, i32** %w, align 4
+  br label %while.body
+
+while.body:
+  %bitPtr.014 = phi i32 [ %add, %while.body ], [ %0, %while.body.preheader ]
+  %value.013 = phi i32 [ %shl, %while.body ], [ %1, %while.body.preheader ]
+  %ptr.012 = phi i32* [ %incdec.ptr, %while.body ], [ %2, %while.body.preheader ]
+  %add = add nuw i32 %bitPtr.014, 8
+  %shr = lshr i32 %value.013, 24
+  %incdec.ptr = getelementptr inbounds i32, i32* %ptr.012, i32 1
+  store i32 %shr, i32* %ptr.012, align 4
+  %shl = shl i32 %value.013, 8
+  %cmp = icmp ult i32 %add, 17
+  br i1 %cmp, label %while.body, label %do.end
+
+do.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-high-cost-trip-count-computation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-high-cost-trip-count-computation.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-high-cost-trip-count-computation.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-high-cost-trip-count-computation.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;; Check that we do emit expensive instructions to compute trip
+;; counts when unrolling loops on the a2 (because we unroll a lot).
+
+define i32 @test(i64 %v12, i8* %array, i64* %loc) {
+; CHECK-LABEL: @test(
+; CHECK: udiv
+entry:
+  %step = load i64, i64* %loc, !range !0
+  br label %loop
+
+loop:                                           ; preds = %entry, %loop
+  %k.015 = phi i64 [ %v15, %loop ], [ %v12, %entry ]
+  %v14 = getelementptr inbounds i8, i8* %array, i64 %k.015
+  store i8 0, i8* %v14
+  %v15 = add nuw nsw i64 %k.015, %step
+  %v16 = icmp slt i64 %v15, 8193
+  br i1 %v16, label %loop, label %loopexit
+
+loopexit:                             ; preds = %loop
+  ret i32 0
+}
+
+!0 = !{i64 1, i64 100}

Added: llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+; EPILOG-LABEL: @test
+; EPILOG: for.body:
+; EPILOG: br i1 %niter.ncmp.7, label %for.end.loopexit{{.*}}, label %for.body
+; EPILOG: for.body.epil{{.*}}:
+
+; PROLOG-LABEL: @test
+; PROLOG: for.body.prol{{.*}}:
+; PROLOG: for.body:
+; PROLOG: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+

Added: llvm/trunk/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'PowerPC' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,50 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s
+define void @unroll_default() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @unroll_default
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+

Added: llvm/trunk/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,74 @@
+; RUN: opt < %s -S -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -loop-unroll | FileCheck %s
+; RUN: opt < %s -S -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -loop-unroll | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind
+define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_addr #0 {
+entry:
+  %cmp10 = icmp sgt i32 %k, 0
+  br i1 %cmp10, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %wide.trip.count = zext i32 %k to i64
+  %min.iters.check = icmp ult i32 %k, 16
+  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
+
+vector.ph:                                        ; preds = %for.body.lr.ph
+  %n.vec = and i64 %wide.trip.count, 4294967280
+  %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %x, i32 0
+  %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %vec.ind12 = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, %vector.ph ], [ %vec.ind.next13, %vector.body ]
+  %0 = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %vec.ind12
+  %1 = and <16 x i32> %0, %broadcast.splat
+  %2 = icmp eq <16 x i32> %1, zeroinitializer
+  %3 = select <16 x i1> %2, <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
+  %4 = getelementptr inbounds i8, i8* %s, i64 %index
+  %5 = bitcast i8* %4 to <16 x i8>*
+  store <16 x i8> %3, <16 x i8>* %5, align 1
+  %index.next = add i64 %index, 16
+  %vec.ind.next13 = add <16 x i32> %vec.ind12, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  %6 = icmp eq i64 %index.next, %n.vec
+  br i1 %6, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body
+  %cmp.n = icmp eq i64 %n.vec, %wide.trip.count
+  br i1 %cmp.n, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %middle.block, %for.body.lr.ph
+  %indvars.iv.ph = phi i64 [ 0, %for.body.lr.ph ], [ %n.vec, %middle.block ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
+  %7 = trunc i64 %indvars.iv to i32
+  %shl = shl i32 1, %7
+  %and = and i32 %shl, %x
+  %tobool = icmp eq i32 %and, 0
+  %conv = select i1 %tobool, i8 48, i8 49
+  %arrayidx = getelementptr inbounds i8, i8* %s, i64 %indvars.iv
+  store i8 %conv, i8* %arrayidx, align 1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %middle.block, %entry
+  %idxprom1 = sext i32 %k to i64
+  %arrayidx2 = getelementptr inbounds i8, i8* %s, i64 %idxprom1
+  store i8 0, i8* %arrayidx2, align 1
+  ret i8* %s
+}
+
+
+; CHECK-LABEL: vector.body
+; CHECK:      shl
+; CHECK-NEXT: and
+; CHECK: shl
+; CHECK-NEXT: and
+; CHECK: label %vector.body
+

Added: llvm/trunk/test/Transforms/LoopUnroll/X86/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/X86/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/X86/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/X86/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/LoopUnroll/X86/mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/X86/mmx.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/X86/mmx.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/X86/mmx.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt < %s -S -loop-unroll | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define x86_mmx @f() #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %phi = phi i32 [ 1, %entry ], [ %add, %for.body ]
+  %add = add i32 %phi, 1
+  %cmp = icmp eq i32 %phi, 0
+  br i1 %cmp, label %exit, label %for.body
+
+exit:                                             ; preds = %for.body
+  %ret = phi x86_mmx [ undef, %for.body ]
+  ; CHECK: %[[ret_ph:.*]] = phi x86_mmx [ undef, %entry
+  ; CHECK: %[[ret_ph1:.*]]  = phi x86_mmx [ undef,
+  ; CHECK: %[[ret:.*]] = phi x86_mmx [ %[[ret_ph]], {{.*}} ], [ %[[ret_ph1]],
+  ; CHECK: ret x86_mmx %[[ret]]
+  ret x86_mmx %ret
+}
+
+attributes #0 = { "target-cpu"="x86-64" }

Added: llvm/trunk/test/Transforms/LoopUnroll/X86/partial.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/X86/partial.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/X86/partial.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/X86/partial.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,130 @@
+; RUN: opt < %s -S -loop-unroll -mcpu=nehalem | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=0 | FileCheck -check-prefix=CHECK-NOUNRL %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i32* noalias nocapture readnone %ip, double %alpha, double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds double, double* %b, i64 %index
+  %1 = bitcast double* %0 to <2 x double>*
+  %wide.load = load <2 x double>, <2 x double>* %1, align 8
+  %.sum9 = or i64 %index, 2
+  %2 = getelementptr double, double* %b, i64 %.sum9
+  %3 = bitcast double* %2 to <2 x double>*
+  %wide.load8 = load <2 x double>, <2 x double>* %3, align 8
+  %4 = fadd <2 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00>
+  %5 = fadd <2 x double> %wide.load8, <double 1.000000e+00, double 1.000000e+00>
+  %6 = getelementptr inbounds double, double* %a, i64 %index
+  %7 = bitcast double* %6 to <2 x double>*
+  store <2 x double> %4, <2 x double>* %7, align 8
+  %.sum10 = or i64 %index, 2
+  %8 = getelementptr double, double* %a, i64 %.sum10
+  %9 = bitcast double* %8 to <2 x double>*
+  store <2 x double> %5, <2 x double>* %9, align 8
+  %index.next = add i64 %index, 4
+  %10 = icmp eq i64 %index.next, 1600
+  br i1 %10, label %for.end, label %vector.body
+
+; FIXME: We should probably unroll this loop by a factor of 2, but the cost
+; model needs to be fixed to account for instructions likely to be folded
+; as part of an addressing mode.
+; CHECK-LABEL: @foo
+; CHECK-NOUNRL-LABEL: @foo
+
+for.end:                                          ; preds = %vector.body
+  ret void
+}
+
+define void @bar(i32* noalias nocapture readnone %ip, double %alpha, double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %v0 = getelementptr inbounds double, double* %b, i64 %index
+  %v1 = bitcast double* %v0 to <2 x double>*
+  %wide.load = load <2 x double>, <2 x double>* %v1, align 8
+  %v4 = fadd <2 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00>
+  %v5 = fmul <2 x double> %v4, <double 8.000000e+00, double 8.000000e+00>
+  %v6 = getelementptr inbounds double, double* %a, i64 %index
+  %v7 = bitcast double* %v6 to <2 x double>*
+  store <2 x double> %v5, <2 x double>* %v7, align 8
+  %index.next = add i64 %index, 2
+  %v10 = icmp eq i64 %index.next, 1600
+  br i1 %v10, label %for.end, label %vector.body
+
+; FIXME: We should probably unroll this loop by a factor of 2, but the cost
+; model needs to first to fixed to account for instructions likely to be folded
+; as part of an addressing mode.
+
+; CHECK-LABEL: @bar
+; CHECK: fadd
+; CHECK-NEXT: fmul
+; CHECK: fadd
+; CHECK-NEXT: fmul
+
+; CHECK-NOUNRL-LABEL: @bar
+; CHECK-NOUNRL: fadd
+; CHECK-NOUNRL-NEXT: fmul
+; CHECK-NOUNRL-NOT: fadd
+
+for.end:                                          ; preds = %vector.body
+  ret void
+}
+
+define zeroext i16 @test1(i16* nocapture readonly %arr, i32 %n) #0 {
+entry:
+  %cmp25 = icmp eq i32 %n, 0
+  br i1 %cmp25, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, i16* %arr, i64 %indvars.iv
+  %0 = load i16, i16* %arrayidx, align 2
+  %mul = shl i16 %0, 1
+  %add = add i16 %mul, %reduction.026
+  %sext = mul i64 %indvars.iv, 12884901888
+  %idxprom3 = ashr exact i64 %sext, 32
+  %arrayidx4 = getelementptr inbounds i16, i16* %arr, i64 %idxprom3
+  %1 = load i16, i16* %arrayidx4, align 2
+  %mul2 = shl i16 %1, 1
+  %add7 = add i16 %add, %mul2
+  %sext28 = mul i64 %indvars.iv, 21474836480
+  %idxprom10 = ashr exact i64 %sext28, 32
+  %arrayidx11 = getelementptr inbounds i16, i16* %arr, i64 %idxprom10
+  %2 = load i16, i16* %arrayidx11, align 2
+  %mul3 = shl i16 %2, 1
+  %add14 = add i16 %add7, %mul3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %reduction.0.lcssa = phi i16 [ 0, %entry ], [ %add14, %for.body ]
+  ret i16 %reduction.0.lcssa
+
+; This loop is too large to be partially unrolled (size=16)
+
+; CHECK-LABEL: @test1
+; CHECK: br
+; CHECK: br
+; CHECK: br
+; CHECK: br
+; CHECK-NOT: br
+
+; CHECK-NOUNRL-LABEL: @test1
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL: br
+; CHECK-NOUNRL-NOT: br
+}
+
+attributes #0 = { nounwind uwtable }
+

Added: llvm/trunk/test/Transforms/LoopUnroll/X86/store_cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/X86/store_cost.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/X86/store_cost.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/X86/store_cost.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,104 @@
+; REQUIRES: asserts
+; RUN: opt -mcpu=core-avx2 -loop-unroll --debug-only=loop-unroll -S -unroll-allow-partial < %s 2>&1 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: Loop Unroll: F[foo] Loop %loop.2.header
+; CHECK: Loop Size = 27
+; CHECK-NOT: UNROLLING loop %loop.2.header
+; CHECK: Loop Unroll: F[foo] Loop %loop.header
+; CHECK:   Loop Size = 25
+; CHECK: UNROLLING loop %loop.header by 2 
+
+define void @foo(i32 * %out) {
+entry:
+  %0 = alloca [1024 x i32]
+  %x0 = alloca [1024 x i32]
+  %x01 = alloca [1024 x i32]
+  %x02 = alloca [1024 x i32]
+  %x03 = alloca [1024 x i32]
+  %x04 = alloca [1024 x i32]
+  %x05 = alloca [1024 x i32]
+  %x06 = alloca [1024 x i32]
+  br label %loop.header
+
+loop.header:
+  %counter = phi i32 [0, %entry], [%inc, %loop.inc]
+  br label %loop.body
+
+loop.body:
+  %ptr = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter
+  store i32 %counter, i32* %ptr
+  %val = add i32 %counter, 5
+  %xptr = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter
+  store i32 %val, i32* %xptr
+  %val1 = add i32 %counter, 6
+  %xptr1 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter
+  store i32 %val1, i32* %xptr1
+  %val2 = add i32 %counter, 7
+  %xptr2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter
+  store i32 %val2, i32* %xptr2
+  %val3 = add i32 %counter, 8
+  %xptr3 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter
+  store i32 %val3, i32* %xptr3
+  %val4 = add i32 %counter, 9
+  %xptr4 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter
+  store i32 %val4, i32* %xptr4
+  %val5 = add i32 %counter, 10
+  %xptr5 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter
+  store i32 %val5, i32* %xptr5
+  br label %loop.inc
+
+loop.inc:
+  %inc = add i32 %counter, 2
+  %1 = icmp sge i32 %inc, 1023
+  br i1 %1, label  %exit.0, label %loop.header
+
+exit.0:
+  %2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 5
+  %3 = load i32, i32* %2
+  store i32 %3, i32 * %out
+  br label %loop.2.header
+
+
+loop.2.header:
+  %counter.2 = phi i32 [0, %exit.0], [%inc.2, %loop.2.inc]
+  br label %loop.2.body
+
+loop.2.body:
+  %ptr.2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter.2
+  store i32 %counter.2, i32* %ptr.2
+  %val.2 = add i32 %counter.2, 5
+  %xptr.2 = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter.2
+  store i32 %val.2, i32* %xptr.2
+  %val1.2 = add i32 %counter.2, 6
+  %xptr1.2 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter.2
+  store i32 %val1, i32* %xptr1.2
+  %val2.2 = add i32 %counter.2, 7
+  %xptr2.2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter.2
+  store i32 %val2, i32* %xptr2.2
+  %val3.2 = add i32 %counter.2, 8
+  %xptr3.2 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter.2
+  store i32 %val3.2, i32* %xptr3.2
+  %val4.2 = add i32 %counter.2, 9
+  %xptr4.2 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter.2
+  store i32 %val4.2, i32* %xptr4.2
+  %val5.2 = add i32 %counter.2, 10
+  %xptr5.2 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter.2
+  store i32 %val5.2, i32* %xptr5.2
+  %xptr6.2 = getelementptr [1024 x i32], [1024 x i32]* %x06, i32 0, i32 %counter.2
+  store i32 %val5.2, i32* %xptr6.2
+  br label %loop.2.inc
+
+loop.2.inc:
+  %inc.2 = add i32 %counter.2, 2
+  %4 = icmp sge i32 %inc.2, 1023
+  br i1 %4, label  %exit.2, label %loop.2.header
+
+exit.2:
+  %x2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 6
+  %x3 = load i32, i32* %x2
+  %out2 = getelementptr i32, i32 * %out, i32 1
+  store i32 %3, i32 * %out2
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/basic.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,48 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+; RUN: opt < %s -passes='require<opt-remark-emit>,unroll' -S | FileCheck %s
+
+
+; This should not unroll since the address of the loop header is taken.
+
+; CHECK-LABEL: @test1(
+; CHECK: store i8* blockaddress(@test1, %l1), i8** %P
+; CHECK: l1:
+; CHECK-NEXT: phi i32
+; rdar://8287027
+define i32 @test1(i8** %P) nounwind ssp {
+entry:
+  store i8* blockaddress(@test1, %l1), i8** %P
+  br label %l1
+
+l1:                                               ; preds = %l1, %entry
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l1 ]
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 3
+  br i1 %exitcond, label %l2, label %l1
+
+l2:                                               ; preds = %l1
+  ret i32 0
+}
+
+; This should not unroll since the call is 'noduplicate'.
+
+; CHECK-LABEL: @test2(
+define i32 @test2(i8** %P) nounwind ssp {
+entry:
+  br label %l1
+
+l1:                                               ; preds = %l1, %entry
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l1 ]
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() noduplicate
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 3
+  br i1 %exitcond, label %l2, label %l1
+
+l2:                                               ; preds = %l1
+  ret i32 0
+; CHECK: }
+}
+
+declare void @f()

Added: llvm/trunk/test/Transforms/LoopUnroll/complete_unroll_profitability_with_assume.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/complete_unroll_profitability_with_assume.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/complete_unroll_profitability_with_assume.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/complete_unroll_profitability_with_assume.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S < %s -loop-unroll -unroll-threshold=42 | FileCheck %s --check-prefix=ANALYZE-FULL
+
+; This test is supposed to check that calls to @llvm.assume builtin are not
+; prohibiting the analysis of full unroll profitability in case the cost of the
+; unrolled loop (not acounting to any simplifications done by such unrolling) is
+; higher than some threshold.
+;
+; Ensure that we indeed are testing this code path by verifying that the loop is
+; not unrolled without such analysis:
+
+; RUN: opt -S < %s -loop-unroll -unroll-threshold=42 -unroll-max-iteration-count-to-analyze=2 \
+; RUN:   -unroll-peel-max-count=0  | FileCheck %s --check-prefix=DONT-ANALYZE-FULL
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+define i32 @foo(i32* %a) {
+; ANALYZE-FULL-LABEL: @foo(
+; ANALYZE-FULL-NEXT:  entry:
+; ANALYZE-FULL-NEXT:    br label [[FOR_BODY:%.*]]
+; ANALYZE-FULL:       for.body:
+; ANALYZE-FULL-NEXT:    br i1 true, label [[DO_STORE:%.*]], label [[FOR_NEXT:%.*]]
+; ANALYZE-FULL:       do_store:
+; ANALYZE-FULL-NEXT:    store i32 0, i32* [[A:%.*]]
+; ANALYZE-FULL-NEXT:    br label [[FOR_NEXT]]
+; ANALYZE-FULL:       for.next:
+; ANALYZE-FULL-NEXT:    br i1 true, label [[DO_STORE_1:%.*]], label [[FOR_NEXT_1:%.*]]
+; ANALYZE-FULL:       do_store.1:
+; ANALYZE-FULL-NEXT:    [[GEP_1:%.*]] = getelementptr i32, i32* [[A]], i32 1
+; ANALYZE-FULL-NEXT:    store i32 1, i32* [[GEP_1]]
+; ANALYZE-FULL-NEXT:    br label [[FOR_NEXT_1]]
+; ANALYZE-FULL:       for.next.1:
+; ANALYZE-FULL-NEXT:    br i1 true, label [[DO_STORE_2:%.*]], label [[FOR_NEXT_2:%.*]]
+; ANALYZE-FULL:       do_store.2:
+; ANALYZE-FULL-NEXT:    [[GEP_2:%.*]] = getelementptr i32, i32* [[A]], i32 2
+; ANALYZE-FULL-NEXT:    store i32 2, i32* [[GEP_2]]
+; ANALYZE-FULL-NEXT:    br label [[FOR_NEXT_2]]
+; ANALYZE-FULL:       for.next.2:
+; ANALYZE-FULL-NEXT:    br i1 true, label [[DO_STORE_3:%.*]], label [[FOR_NEXT_3:%.*]]
+; ANALYZE-FULL:       do_store.3:
+; ANALYZE-FULL-NEXT:    [[GEP_3:%.*]] = getelementptr i32, i32* [[A]], i32 3
+; ANALYZE-FULL-NEXT:    store i32 3, i32* [[GEP_3]]
+; ANALYZE-FULL-NEXT:    br label [[FOR_NEXT_3]]
+; ANALYZE-FULL:       for.next.3:
+; ANALYZE-FULL-NEXT:    br i1 false, label [[DO_STORE_4:%.*]], label [[FOR_NEXT_4:%.*]]
+; ANALYZE-FULL:       do_store.4:
+; ANALYZE-FULL-NEXT:    [[GEP_4:%.*]] = getelementptr i32, i32* [[A]], i32 4
+; ANALYZE-FULL-NEXT:    store i32 4, i32* [[GEP_4]]
+; ANALYZE-FULL-NEXT:    br label [[FOR_NEXT_4]]
+; ANALYZE-FULL:       for.next.4:
+; ANALYZE-FULL-NEXT:    br i1 false, label [[DO_STORE_5:%.*]], label [[FOR_NEXT_5:%.*]]
+; ANALYZE-FULL:       do_store.5:
+; ANALYZE-FULL-NEXT:    [[GEP_5:%.*]] = getelementptr i32, i32* [[A]], i32 5
+; ANALYZE-FULL-NEXT:    store i32 5, i32* [[GEP_5]]
+; ANALYZE-FULL-NEXT:    br label [[FOR_NEXT_5]]
+; ANALYZE-FULL:       for.next.5:
+; ANALYZE-FULL-NEXT:    br i1 false, label [[DO_STORE_6:%.*]], label [[FOR_NEXT_6:%.*]]
+; ANALYZE-FULL:       do_store.6:
+; ANALYZE-FULL-NEXT:    [[GEP_6:%.*]] = getelementptr i32, i32* [[A]], i32 6
+; ANALYZE-FULL-NEXT:    store i32 6, i32* [[GEP_6]]
+; ANALYZE-FULL-NEXT:    br label [[FOR_NEXT_6]]
+; ANALYZE-FULL:       for.next.6:
+; ANALYZE-FULL-NEXT:    br i1 false, label [[DO_STORE_7:%.*]], label [[FOR_NEXT_7:%.*]]
+; ANALYZE-FULL:       do_store.7:
+; ANALYZE-FULL-NEXT:    [[GEP_7:%.*]] = getelementptr i32, i32* [[A]], i32 7
+; ANALYZE-FULL-NEXT:    store i32 7, i32* [[GEP_7]]
+; ANALYZE-FULL-NEXT:    br label [[FOR_NEXT_7]]
+; ANALYZE-FULL:       for.next.7:
+; ANALYZE-FULL-NEXT:    br i1 false, label [[DO_STORE_8:%.*]], label [[FOR_NEXT_8:%.*]]
+; ANALYZE-FULL:       do_store.8:
+; ANALYZE-FULL-NEXT:    [[GEP_8:%.*]] = getelementptr i32, i32* [[A]], i32 8
+; ANALYZE-FULL-NEXT:    store i32 8, i32* [[GEP_8]]
+; ANALYZE-FULL-NEXT:    br label [[FOR_NEXT_8]]
+; ANALYZE-FULL:       for.next.8:
+; ANALYZE-FULL-NEXT:    ret i32 9
+;
+; DONT-ANALYZE-FULL-LABEL: @foo(
+; DONT-ANALYZE-FULL-NEXT:  entry:
+; DONT-ANALYZE-FULL-NEXT:    br label [[FOR_BODY:%.*]]
+; DONT-ANALYZE-FULL:       for.body:
+; DONT-ANALYZE-FULL-NEXT:    [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_NEXT:%.*]] ]
+; DONT-ANALYZE-FULL-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
+; DONT-ANALYZE-FULL-NEXT:    [[CMP:%.*]] = icmp ule i32 [[INDVAR]], 20
+; DONT-ANALYZE-FULL-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; DONT-ANALYZE-FULL-NEXT:    [[CMP2:%.*]] = icmp ule i32 [[INDVAR]], 3
+; DONT-ANALYZE-FULL-NEXT:    br i1 [[CMP2]], label [[DO_STORE:%.*]], label [[FOR_NEXT]]
+; DONT-ANALYZE-FULL:       do_store:
+; DONT-ANALYZE-FULL-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 [[INDVAR]]
+; DONT-ANALYZE-FULL-NEXT:    store i32 [[INDVAR]], i32* [[GEP]]
+; DONT-ANALYZE-FULL-NEXT:    br label [[FOR_NEXT]]
+; DONT-ANALYZE-FULL:       for.next:
+; DONT-ANALYZE-FULL-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INDVAR_NEXT]], 9
+; DONT-ANALYZE-FULL-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEXIT:%.*]]
+; DONT-ANALYZE-FULL:       loopexit:
+; DONT-ANALYZE-FULL-NEXT:    [[INDVAR_NEXT_LCSSA:%.*]] = phi i32 [ [[INDVAR_NEXT]], [[FOR_NEXT]] ]
+; DONT-ANALYZE-FULL-NEXT:    ret i32 [[INDVAR_NEXT_LCSSA]]
+;
+entry:
+  br label %for.body
+for.body:
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %for.next ]
+  %indvar.next = add i32 %indvar, 1
+  %cmp = icmp ule i32 %indvar, 20
+  tail call void @llvm.assume(i1 %cmp)
+  %cmp2 = icmp ule i32 %indvar, 3
+  br i1 %cmp2, label %do_store, label %for.next
+
+do_store:
+  %gep = getelementptr i32, i32* %a, i32 %indvar
+  store i32 %indvar, i32* %gep
+  br label %for.next
+
+for.next:
+  %exitcond = icmp ne i32 %indvar.next, 9
+  br i1 %exitcond, label %for.body, label %loopexit
+loopexit:
+  ret i32 %indvar.next
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/convergent.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/convergent.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/convergent.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/convergent.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,179 @@
+; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s
+
+declare void @f() convergent
+
+; Although this loop contains a convergent instruction, it should be
+; fully unrolled.
+;
+; CHECK-LABEL: @full_unroll(
+define i32 @full_unroll() {
+entry:
+  br label %l3
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() ;convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 3
+  br i1 %exitcond, label %exit, label %l3
+
+exit:
+  ret i32 0
+}
+
+; This loop contains a convergent instruction, but it should be partially
+; unrolled.  The unroll count is the largest power of 2 that divides the
+; multiple -- 4, in this case.
+;
+; CHECK-LABEL: @runtime_unroll(
+define i32 @runtime_unroll(i32 %n) {
+entry:
+  %loop_ctl = mul nsw i32 %n, 12
+  br label %l3
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, %loop_ctl
+  br i1 %exitcond, label %exit, label %l3
+
+exit:
+  ret i32 0
+}
+
+; This loop contains a convergent instruction, so its partial unroll
+; count must divide its trip multiple.  This overrides its unroll
+; pragma -- we unroll exactly 8 times, even though 16 is requested.
+; CHECK-LABEL: @pragma_unroll
+define i32 @pragma_unroll(i32 %n) {
+entry:
+  %loop_ctl = mul nsw i32 %n, 24
+  br label %l3, !llvm.loop !0
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, %loop_ctl
+  br i1 %exitcond, label %exit, label %l3, !llvm.loop !0
+
+exit:
+  ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 divides trip count 4. The loop unroll should respect the pragma.
+; CHECK-LABEL: @pragma_unroll_divisible_trip_count
+define void @pragma_unroll_divisible_trip_count() {
+entry:
+  br label %l3, !llvm.loop !1
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 4
+  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+  ret void
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 divides trip multiple 2. The loop unroll should respect the pragma.
+; CHECK-LABEL: @pragma_unroll_divisible_trip_multiple
+define i32 @pragma_unroll_divisible_trip_multiple(i32 %n) {
+entry:
+  %loop_ctl = mul nsw i32 %n, 2
+  br label %l3, !llvm.loop !1
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, %loop_ctl
+  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+  ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 is unknown to divide runtime trip count, the loop is not unrolled
+; since remainder is forbidden for unrolling convergent loop.
+; ToDo: Forbidding remainder for unrolling convergent loop may be relaxed
+; in the future.
+; CHECK-LABEL: @pragma_unroll_indivisible_runtime_trip_count
+define i32 @pragma_unroll_indivisible_runtime_trip_count(i32 %n) {
+entry:
+  br label %l3, !llvm.loop !1
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+  ret i32 0
+}
+
+; This loop contains a convergent instruction. Since the pragma loop unroll
+; count 2 does not divide trip count 5, the loop is not unrolled by 2
+; since remainder is forbidden for unrolling convergent loop. Instead, the
+; loop gets fully unrolled.
+; ToDo: Forbidding remainder for unrolling convergent loop may be relaxed
+; in the future.
+; CHECK-LABEL: @pragma_unroll_indivisible_trip_count
+define i32 @pragma_unroll_indivisible_trip_count() {
+entry:
+  br label %l3, !llvm.loop !1
+
+l3:
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+  call void @f() convergent
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 5
+  br i1 %exitcond, label %exit, label %l3, !llvm.loop !1
+
+exit:
+  ret i32 0
+}
+
+!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
+!1 = !{!1, !{!"llvm.loop.unroll.count", i32 2}}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/debug-info.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/debug-info.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/debug-info.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/debug-info.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,75 @@
+; RUN: opt %s -S -o - -loop-unroll | FileCheck %s
+; generated at -O3 from:
+; void f() {
+;   for (int i = 1; i <=32; i <<=2 )
+;     bar(i>>1);
+; }
+source_filename = "/tmp/loop.c"
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.13.0"
+
+; Function Attrs: nounwind ssp uwtable
+define void @f() local_unnamed_addr #0 !dbg !8 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 1, metadata !12, metadata !DIExpression()), !dbg !15
+  br label %for.body, !dbg !16
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void, !dbg !17
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.04 = phi i32 [ 1, %entry ], [ %shl, %for.body ]
+  tail call void @llvm.dbg.value(metadata i32 %i.04, metadata !12, metadata !DIExpression()), !dbg !15
+  %shr = ashr i32 %i.04, 1, !dbg !18
+
+  ; The loop gets unrolled entirely.
+  ; CHECK: call void @llvm.dbg.value(metadata i32 1, metadata !12, metadata !DIExpression()), !dbg !15
+  ; CHECK: call void @llvm.dbg.value(metadata i32 4, metadata !12, metadata !DIExpression()), !dbg !15
+  ; CHECK: call void @llvm.dbg.value(metadata i32 16, metadata !12, metadata !DIExpression()), !dbg !15
+  ; CHECK: call void @llvm.dbg.value(metadata i32 64, metadata !12, metadata !DIExpression()), !dbg !15
+  
+  %call = tail call i32 (i32, ...) bitcast (i32 (...)* @bar to i32 (i32, ...)*)(i32 %shr) #3, !dbg !20
+  %shl = shl i32 %i.04, 2, !dbg !21
+  tail call void @llvm.dbg.value(metadata i32 %shl, metadata !12, metadata !DIExpression()), !dbg !15
+  %cmp = icmp slt i32 %shl, 33, !dbg !22
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !16, !llvm.loop !23
+}
+
+declare i32 @bar(...) local_unnamed_addr
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.value(metadata, metadata, metadata) #2
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #2 = { nounwind readnone speculatable }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 (trunk 317113) (llvm/trunk 317122)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "/tmp/loop.c", directory: "/Data/llvm")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!7 = !{!"clang version 6.0.0 (trunk 317113) (llvm/trunk 317122)"}
+!8 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !11)
+!9 = !DISubroutineType(types: !10)
+!10 = !{null}
+!11 = !{!12}
+!12 = !DILocalVariable(name: "i", scope: !13, file: !1, line: 2, type: !14)
+!13 = distinct !DILexicalBlock(scope: !8, file: !1, line: 2, column: 3)
+!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!15 = !DILocation(line: 2, column: 12, scope: !13)
+!16 = !DILocation(line: 2, column: 3, scope: !13)
+!17 = !DILocation(line: 4, column: 1, scope: !8)
+!18 = !DILocation(line: 3, column: 10, scope: !19)
+!19 = distinct !DILexicalBlock(scope: !13, file: !1, line: 2, column: 3)
+!20 = !DILocation(line: 3, column: 5, scope: !19)
+!21 = !DILocation(line: 2, column: 29, scope: !19)
+!22 = !DILocation(line: 2, column: 21, scope: !19)
+!23 = distinct !{!23, !16, !24}
+!24 = !DILocation(line: 3, column: 13, scope: !13)

Added: llvm/trunk/test/Transforms/LoopUnroll/disable-loop-unrolling_forced.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/disable-loop-unrolling_forced.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/disable-loop-unrolling_forced.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/disable-loop-unrolling_forced.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt -disable-loop-unrolling -O1 -S < %s | FileCheck %s
+;
+; Check loop unrolling metadata is honored despite automatic unrolling
+; being disabled in the pass builder.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @forced(
+; CHECK: load
+; CHECK: load
+define void @forced(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0, !{!"llvm.loop.unroll.enable"},
+                    !{!"llvm.loop.unroll.count", i32 2}}

Added: llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt -loop-unroll -unroll-count=2 -S < %s | FileCheck %s
+;
+; Check that the disable_nonforced loop property is honored by
+; loop unroll.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @disable_nonforced(
+; CHECK: load
+; CHECK-NOT: load
+define void @disable_nonforced(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !{!"llvm.loop.disable_nonforced"}}

Added: llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_count.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_count.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_count.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_count.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt -loop-unroll -unroll-count=2 -S < %s | FileCheck %s
+;
+; Check whether the llvm.loop.unroll.count loop property overrides
+; llvm.loop.disable_nonforced.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @disable_nonforced_count(
+; CHECK: store
+; CHECK: store
+; CHECK-NOT: store
+define void @disable_nonforced_count(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !{!"llvm.loop.disable_nonforced"}, !{!"llvm.loop.unroll.count", i32 2}}

Added: llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_enable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_enable.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_enable.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_enable.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt -loop-unroll -unroll-count=2 -S < %s | FileCheck %s
+;
+; Check that the llvm.loop.unroll.enable loop property overrides
+; llvm.loop.disable_nonforced.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @disable_nonforced_enable(
+; CHECK: store
+; CHECK: store
+; CHECK-NOT: store
+define void @disable_nonforced_enable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !{!"llvm.loop.disable_nonforced"}, !{!"llvm.loop.unroll.enable"}}

Added: llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_full.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_full.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_full.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/disable_nonforced_full.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; RUN: opt -loop-unroll -S < %s | FileCheck %s
+;
+; Check that the llvm.loop.unroll.full loop property overrides
+; llvm.loop.disable_nonforced.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @disable_nonforced_full(
+; CHECK: store
+; CHECK: store
+; CHECK: store
+; CHECK: store
+; CHECK-NOT: store
+define void @disable_nonforced_full(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !{!"llvm.loop.disable_nonforced"}, !{!"llvm.loop.unroll.full"}}

Added: llvm/trunk/test/Transforms/LoopUnroll/ephemeral.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/ephemeral.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/ephemeral.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/ephemeral.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s
+
+; Make sure this loop is completely unrolled...
+; CHECK-LABEL: @test1
+; CHECK: for.body:
+; CHECK-NOT: for.end:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+
+  ; This loop will be completely unrolled, even with these extra instructions,
+  ; but only because they're ephemeral (and, thus, free).
+  %1 = add nsw i32 %0, 2
+  %2 = add nsw i32 %1, 4
+  %3 = add nsw i32 %2, 4
+  %4 = add nsw i32 %3, 4
+  %5 = add nsw i32 %4, 4
+  %6 = add nsw i32 %5, 4
+  %7 = add nsw i32 %6, 4
+  %8 = add nsw i32 %7, 4
+  %9 = add nsw i32 %8, 4
+  %10 = add nsw i32 %9, 4
+  %ca = icmp sgt i32 %10, -7
+  call void @llvm.assume(i1 %ca)
+
+  %add = add nsw i32 %0, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+declare void @llvm.assume(i1) nounwind
+

Added: llvm/trunk/test/Transforms/LoopUnroll/epilog_const_phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/epilog_const_phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/epilog_const_phi.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/epilog_const_phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,65 @@
+; RUN: opt -S -loop-unroll -unroll-runtime < %s | FileCheck %s
+
+; Epilog unroll allows to keep PHI constant value.
+; For the test this means that after unroll XOR could be deleted.
+; Check that we do epilogue reminder here.
+
+; CHECK-LABEL: const_phi_val
+; CHECK:  for.body.epil
+
+; Function Attrs: norecurse nounwind uwtable
+define void @const_phi_val(i32 %i0, i32* nocapture %a) {
+entry:
+  %cmp6 = icmp slt i32 %i0, 1000
+  br i1 %cmp6, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %tmp = sext i32 %i0 to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %indvars.iv = phi i64 [ %tmp, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %s.08 = phi i32 [ 0, %for.body.preheader ], [ %xor, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  store i32 %s.08, i32* %arrayidx, align 4
+  %xor = xor i32 %s.08, 1
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
+; When there is no phi with const coming from preheader,
+; there is no need to do epilogue unrolling.
+
+; CHECK-LABEL: var_phi_val
+; CHECK:  for.body.prol
+
+; Function Attrs: norecurse nounwind uwtable
+define void @var_phi_val(i32 %i0, i32* nocapture %a) {
+entry:
+  %cmp6 = icmp slt i32 %i0, 1000
+  br i1 %cmp6, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %tmp = sext i32 %i0 to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %indvars.iv = phi i64 [ %tmp, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/followup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/followup.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/followup.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/followup.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,63 @@
+; RUN: opt < %s -S -loop-unroll -unroll-count=2 | FileCheck %s -check-prefixes=COUNT,COMMON
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true  | FileCheck %s -check-prefixes=EPILOG,COMMON
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
+;
+; Check that followup-attributes are applied after LoopUnroll.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+!1 = !{!"llvm.loop.unroll.followup_all", !{!"FollowupAll"}}
+!2 = !{!"llvm.loop.unroll.followup_unrolled", !{!"FollowupUnrolled"}}
+!3 = !{!"llvm.loop.unroll.followup_remainder", !{!"FollowupRemainder"}}
+!4 = distinct !{!4, !1, !2, !3}
+
+
+; COMMON-LABEL: @test(
+
+
+; COUNT: br i1 %exitcond.1, label %for.end.loopexit, label %for.body, !llvm.loop ![[LOOP:[0-9]+]]
+
+; COUNT: ![[FOLLOWUP_ALL:[0-9]+]] = !{!"FollowupAll"}
+; COUNT: ![[FOLLOWUP_UNROLLED:[0-9]+]] = !{!"FollowupUnrolled"}
+; COUNT: ![[LOOP]] = distinct !{![[LOOP]], ![[FOLLOWUP_ALL]], ![[FOLLOWUP_UNROLLED]]}
+
+
+; EPILOG: br i1 %niter.ncmp.7, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !llvm.loop ![[LOOP_0:[0-9]+]]
+; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop ![[LOOP_2:[0-9]+]]
+
+; EPILOG: ![[LOOP_0]] = distinct !{![[LOOP_0]], ![[FOLLOWUP_ALL:[0-9]+]], ![[FOLLOWUP_UNROLLED:[0-9]+]]}
+; EPILOG: ![[FOLLOWUP_ALL]] = !{!"FollowupAll"}
+; EPILOG: ![[FOLLOWUP_UNROLLED]] = !{!"FollowupUnrolled"}
+; EPILOG: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[FOLLOWUP_ALL]], ![[FOLLOWUP_REMAINDER:[0-9]+]]}
+; EPILOG: ![[FOLLOWUP_REMAINDER]] = !{!"FollowupRemainder"}
+
+
+; PROLOG:  br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop ![[LOOP_0:[0-9]+]]
+; PROLOG:  br i1 %exitcond.7, label %for.end.loopexit.unr-lcssa, label %for.body, !llvm.loop ![[LOOP_2:[0-9]+]]
+
+; PROLOG: ![[LOOP_0]] = distinct !{![[LOOP_0]], ![[FOLLOWUP_ALL:[0-9]+]], ![[FOLLOWUP_REMAINDER:[0-9]+]]}
+; PROLOG: ![[FOLLOWUP_ALL]] = !{!"FollowupAll"}
+; PROLOG: ![[FOLLOWUP_REMAINDER]] = !{!"FollowupRemainder"}
+; PROLOG: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[FOLLOWUP_ALL]], ![[FOLLOWUP_UNROLLED:[0-9]+]]}
+; PROLOG: ![[FOLLOWUP_UNROLLED]] = !{!"FollowupUnrolled"}

Added: llvm/trunk/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/full-unroll-bad-cost.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,59 @@
+; RUN: opt -S -loop-unroll < %s | FileCheck %s
+; RUN: opt < %s -passes='require<opt-remark-emit>,loop(unroll-full)' -S | FileCheck %s
+
+; LLVM should not try to fully unroll this loop.
+
+declare void @f()
+declare void @g()
+declare void @h()
+
+define void @trivial_loop() {
+; CHECK-LABEL: @trivial_loop(
+ entry:
+  br label %loop
+
+ loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
+  %idx.inc = add i32 %idx, 1
+  call void @f()
+  call void @g()
+  call void @h()
+  call void @f()
+  call void @g()
+  call void @h()
+  call void @f()
+  call void @g()
+  call void @h()
+  call void @f()
+  call void @g()
+  call void @h()
+  call void @f()
+  call void @g()
+  call void @h()
+  %be = icmp slt i32 %idx, 268435456
+  br i1 %be, label %loop, label %exit
+
+; CHECK: loop:
+; CHECK-NEXT:  %idx = phi i32 [ 0, %entry ], [ %idx.inc, %loop ]
+; CHECK-NEXT:  %idx.inc = add i32 %idx, 1
+; CHECK-NEXT:  call void @f()
+; CHECK-NEXT:  call void @g()
+; CHECK-NEXT:  call void @h()
+; CHECK-NEXT:  call void @f()
+; CHECK-NEXT:  call void @g()
+; CHECK-NEXT:  call void @h()
+; CHECK-NEXT:  call void @f()
+; CHECK-NEXT:  call void @g()
+; CHECK-NEXT:  call void @h()
+; CHECK-NEXT:  call void @f()
+; CHECK-NEXT:  call void @g()
+; CHECK-NEXT:  call void @h()
+; CHECK-NEXT:  call void @f()
+; CHECK-NEXT:  call void @g()
+; CHECK-NEXT:  call void @h()
+; CHECK-NEXT:  %be = icmp slt i32 %idx, 268435456
+; CHECK-NEXT:  br i1 %be, label %loop, label %exit
+
+ exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/full-unroll-crashers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/full-unroll-crashers.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/full-unroll-crashers.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/full-unroll-crashers.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,225 @@
+; Check that we don't crash on corner cases.
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-max-percent-threshold-boost=200 -o /dev/null
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-max-percent-threshold-boost=200 -o /dev/null
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+ at known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
+
+define void @foo1() {
+entry:
+  br label %for.body
+
+for.body:
+  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %idx = zext i32 undef to i64
+  %add.ptr = getelementptr inbounds i64, i64* null, i64 %idx
+  %inc = add nuw nsw i64 %phi, 1
+  %cmp = icmp ult i64 %inc, 999
+  br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+  ret void
+}
+
+define void @foo2() {
+entry:
+  br label %for.body
+
+for.body:
+  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %x = getelementptr i32, <4 x i32*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %inc = add nuw nsw i64 %phi, 1
+  %cmp = icmp ult i64 %inc, 999
+  br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+  ret void
+}
+
+define void @cmp_undef() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+  %x1 = load i32, i32* %arrayidx1, align 4
+  %cmp = icmp eq i32 %x1, undef
+  br i1 %cmp, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %iv.1 = add nuw nsw i64 %iv.0, 1
+  %exitcond = icmp eq i64 %iv.1, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret void
+}
+
+define void @switch() {
+entry:
+  br label %for.body
+
+for.body:
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+  %x1 = load i32, i32* %arrayidx1, align 4
+  switch i32 %x1, label %l1 [
+  ]
+
+l1:
+  %x2 = add i32 %x1, 2
+  br label %for.inc
+
+for.inc:
+  %iv.1 = add nuw nsw i64 %iv.0, 1
+  %exitcond = icmp eq i64 %iv.1, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define <4 x i32> @vec_load() {
+entry:
+  br label %for.body
+
+for.body:
+  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %vec_phi = phi <4 x i32> [ <i32 0, i32 0, i32 0, i32 0>, %entry ], [ %r, %for.body ]
+  %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %phi
+  %bc = bitcast i32* %arrayidx to <4 x i32>*
+  %x = load <4 x i32>, < 4 x i32>* %bc, align 4
+  %r = add <4 x i32> %x, %vec_phi
+  %inc = add nuw nsw i64 %phi, 1
+  %cmp = icmp ult i64 %inc, 999
+  br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+  ret <4 x i32> %r
+}
+
+define void @ptrtoint_cast() optsize {
+entry:
+  br label %for.body
+
+for.body:
+  br i1 true, label %for.inc, label %if.then
+
+if.then:
+  %arraydecay = getelementptr inbounds [1 x i32], [1 x i32]* null, i64 0, i64 0
+  %x = ptrtoint i32* %arraydecay to i64
+  br label %for.inc
+
+for.inc:
+  br i1 false, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @ptrtoint_cast2() {
+entry:
+  br i1 false, label %for.body.lr.ph, label %exit
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %offset = getelementptr inbounds float, float* null, i32 3
+  %bc = bitcast float* %offset to i64*
+  %inc = add nuw nsw i32 %iv, 1
+  br i1 false, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+ at i = external global i32, align 4
+
+define void @folded_not_to_constantint() {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %m = phi i32* [ @i, %entry ], [ %m, %for.inc ]
+  br i1 undef, label %if.else, label %if.then
+
+if.then:
+  unreachable
+
+if.else:
+  %cmp = icmp ult i32* %m, null
+  br i1 %cmp, label %cond.false, label %for.inc
+
+cond.false:
+  unreachable
+
+for.inc:
+  %inc = add nuw nsw i32 %iv, 1
+  %cmp2 = icmp ult i32 %inc, 10
+  br i1 %cmp2, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+define void @index_too_large() {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ -73631599, %entry ], [ %iv.next, %for.inc ]
+  br i1 undef, label %for.body2, label %for.inc
+
+for.body2:
+  %idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
+  %x = load i32, i32* %idx, align 1
+  br label %for.inc
+
+for.inc:
+  %iv.next = add nsw i64 %iv, -1
+  br i1 undef, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+define void @cmp_type_mismatch() {
+entry:
+  br label %for.header
+
+for.header:
+  br label %for.body
+
+for.body:
+  %d = phi i32* [ null, %for.header ]
+  %cmp = icmp eq i32* %d, null
+  br i1 undef, label %for.end, label %for.header
+
+for.end:
+  ret void
+}
+
+define void @load_type_mismatch() {
+entry:
+  br label %for.body
+
+for.body:
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
+  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+  %bc = bitcast i32* %arrayidx1 to i64*
+  %x1 = load i64, i64* %bc, align 4
+  %x2 = add i64 10, %x1
+  %iv.1 = add nuw nsw i64 %iv.0, 1
+  %exitcond = icmp eq i64 %iv.1, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,90 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+ at unknown_global = internal unnamed_addr global [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+ at weak_constant = weak unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+
+; Though @unknown_global is initialized with constant values, we can't consider
+; it as a constant, so we shouldn't unroll the loop.
+; CHECK-LABEL: @foo
+; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @unknown_global, i64 0, i64 %iv
+define i32 @foo(i32* noalias nocapture readonly %src) {
+entry:
+  br label %loop
+
+loop:                                                ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i32 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @unknown_global, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %mul = mul nsw i32 %src_element, %const_array_element
+  %add = add nsw i32 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, 9
+  br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end:                                            ; preds = %loop
+  %r.lcssa = phi i32 [ %r, %loop ]
+  ret i32 %r.lcssa
+}
+
+; Similarly, we can't consider 'weak' symbols as a known constant value, so we
+; shouldn't unroll the loop.
+; CHECK-LABEL: @foo2
+; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @weak_constant, i64 0, i64 %iv
+define i32 @foo2(i32* noalias nocapture readonly %src) {
+entry:
+  br label %loop
+
+loop:                                                ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i32 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @weak_constant, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %mul = mul nsw i32 %src_element, %const_array_element
+  %add = add nsw i32 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, 9
+  br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end:                                            ; preds = %loop
+  %r.lcssa = phi i32 [ %r, %loop ]
+  ret i32 %r.lcssa
+}
+
+; In this case the loaded value is used only to control branch.
+; If we missed that, we could've thought that it's unused and unrolling would
+; clean up almost entire loop. Make sure that we do not unroll such loop.
+; CHECK-LABEL: @foo3
+; CHECK: br i1 %exitcond, label %loop.end, label %loop.header
+define i32 @foo3(i32* noalias nocapture readonly %src) {
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ]
+  %r1  = phi i32 [ 0, %entry ], [ %r3, %loop.latch ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %cmp = icmp eq i32 0, %src_element
+  br i1 %cmp, label %loop.if, label %loop.latch
+
+loop.if:
+  %r2 = add i32 %r1, 1
+  br label %loop.latch
+
+loop.latch:
+  %r3 = phi i32 [%r1, %loop.header], [%r2, %loop.if]
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %inc, 9
+  br i1 %exitcond, label %loop.end, label %loop.header
+
+loop.end:
+  %r.lcssa = phi i32 [ %r3, %loop.latch ]
+  ret i32 %r.lcssa
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,79 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+ at known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
+
+; If we can figure out result of comparison on each iteration, we can resolve
+; the depending branch. That means, that the unrolled version of the loop would
+; have less code, because we don't need not-taken basic blocks there.
+; This test checks that this is taken into consideration.
+; We expect this loop to be unrolled, because the most complicated part of its
+; body (if.then block) is never actually executed.
+; CHECK-LABEL: @branch_folded
+; CHECK-NOT: br i1 %
+; CHECK: ret i32
+define i32 @branch_folded(i32* noalias nocapture readonly %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+  %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
+  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+  %x1 = load i32, i32* %arrayidx1, align 4
+  %cmp = icmp eq i32 %x1, 0
+  %iv.1 = add nuw nsw i64 %iv.0, 1
+  br i1 %cmp, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv.0
+  %x2 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %x2, %r.0
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %r.1 = phi i32 [ %add, %if.then ], [ %x1, %for.body ]
+  %exitcond = icmp eq i64 %iv.1, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret i32 %r.1
+}
+
+; Check that we don't crash when we analyze icmp with pointer-typed IV and a
+; pointer.
+; CHECK-LABEL: @ptr_cmp_crash
+; CHECK:   ret void
+define void @ptr_cmp_crash() {
+entry:
+  br label %while.body
+
+while.body:
+  %iv.0 = phi i32* [ getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 0), %entry ], [ %iv.1, %while.body ]
+  %iv.1 = getelementptr inbounds i32, i32* %iv.0, i64 1
+  %exitcond = icmp eq i32* %iv.1, getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 9)
+  br i1 %exitcond, label %loop.exit, label %while.body
+
+loop.exit:
+  ret void
+}
+
+; Check that we don't crash when we analyze ptrtoint cast.
+; CHECK-LABEL: @ptrtoint_cast_crash
+; CHECK:   ret void
+define void @ptrtoint_cast_crash(i8 * %a) {
+entry:
+  %limit = getelementptr i8, i8* %a, i64 512
+  br label %loop.body
+
+loop.body:
+  %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
+  %cast = ptrtoint i8* %iv.0 to i64
+  %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
+  %exitcond = icmp ne i8* %iv.1, %limit
+  br i1 %exitcond, label %loop.body, label %loop.exit
+
+loop.exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=12 -unroll-max-percent-threshold-boost=400 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=12 -unroll-max-percent-threshold-boost=400 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+ at known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
+
+; If a load becomes a constant after loop unrolling, we sometimes can simplify
+; CFG. This test verifies that we handle such cases.
+; After one operand in an instruction is constant-folded and the
+; instruction is simplified, the other operand might become dead.
+; In this test we have::
+; for i in 1..10:
+;   r += A[i] * B[i]
+; A[i] is 0 almost at every iteration, so there is no need in loading B[i] at
+; all.
+
+
+; CHECK-LABEL: @unroll_dce
+; CHECK-NOT:   br i1 %exitcond, label %for.end, label %for.body
+define i32 @unroll_dce(i32* noalias nocapture readonly %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
+  %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.body ]
+  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+  %x1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv.0
+  %x2 = load i32, i32* %arrayidx2, align 4
+  %mul = mul i32 %x1, %x2
+  %r.1 = add i32 %mul, %r.0
+  %iv.1 = add nuw nsw i64 %iv.0, 1
+  %exitcond = icmp eq i64 %iv.1, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %r.1
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; When examining gep-instructions we shouldn't consider them simplified if the
+; corresponding memory access isn't simplified. Doing the opposite might bias
+; our estimate, so that we might decide to unroll even a simple memcpy loop.
+;
+; Thus, the following loop shouldn't be unrolled:
+; CHECK-LABEL: @not_simplified_geps
+; CHECK: br i1 %
+; CHECK: ret void
+define void @not_simplified_geps(i32* noalias %b, i32* noalias %c) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
+  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %iv.0
+  %x1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %c, i64 %iv.0
+  store i32 %x1, i32* %arrayidx2, align 4
+  %iv.1 = add nuw nsw i64 %iv.0, 1
+  %exitcond = icmp eq i64 %iv.1, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=100 -unroll-threshold=10 -unroll-max-percent-threshold-boost=200 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define i64 @propagate_loop_phis() {
+; CHECK-LABEL: @propagate_loop_phis(
+; CHECK-NOT: br i1
+; CHECK: ret i64 3
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %x0 = phi i64 [ 0, %entry ], [ %x2, %loop ]
+  %x1 = or i64 %x0, 1
+  %x2 = or i64 %x1, 2
+  %inc = add nuw nsw i64 %iv, 1
+  %cond = icmp sge i64 %inc, 10
+  br i1 %cond, label %loop.end, label %loop
+
+loop.end:
+  %x.lcssa = phi i64 [ %x2, %loop ]
+  ret i64 %x.lcssa
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/full-unroll-heuristics.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,69 @@
+; In this test we check how heuristics for complete unrolling work. We have
+; three knobs:
+;  1) -unroll-threshold
+;  3) -unroll-percent-dynamic-cost-saved-threshold and
+;  2) -unroll-dynamic-cost-savings-discount
+;
+; They control loop-unrolling according to the following rules:
+;  * If size of unrolled loop exceeds the absoulte threshold, we don't unroll
+;    this loop under any circumstances.
+;  * If size of unrolled loop is below the '-unroll-threshold', then we'll
+;    consider this loop as a very small one, and completely unroll it.
+;  * If a loop size is between these two tresholds, we only do complete unroll
+;    it if estimated number of potentially optimized instructions is high (we
+;    specify the minimal percent of such instructions).
+
+; In this particular test-case, complete unrolling will allow later
+; optimizations to remove ~55% of the instructions, the loop body size is 9,
+; and unrolled size is 65.
+
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST1
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=200 | FileCheck %s -check-prefix=TEST2
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3
+
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST1
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=200 | FileCheck %s -check-prefix=TEST2
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3
+
+; Check that these work when the unroller has partial unrolling enabled too.
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST1
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=200 | FileCheck %s -check-prefix=TEST2
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3
+
+; If the absolute threshold is too low, we should not unroll:
+; TEST1: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
+
+; Otherwise, we should:
+; TEST2-NOT: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
+
+; If we do not boost threshold, the unroll will not happen:
+; TEST3: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
+
+; And check that we don't crash when we're not allowed to do any analysis.
+; RUN: opt < %s -loop-unroll -unroll-max-iteration-count-to-analyze=0 -disable-output
+; RUN: opt < %s -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-max-iteration-count-to-analyze=0 -disable-output
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+ at known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+
+define i32 @foo(i32* noalias nocapture readonly %src) {
+entry:
+  br label %loop
+
+loop:                                                ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i32 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %mul = mul nsw i32 %src_element, %const_array_element
+  %add = add nsw i32 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, 9
+  br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end:                                            ; preds = %loop
+  %r.lcssa = phi i32 [ %r, %loop ]
+  ret i32 %r.lcssa
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,208 @@
+; RUN: opt -S -loop-unroll < %s | FileCheck %s
+; RUN: opt -S -passes='require<opt-remark-emit>,loop(unroll-full)' < %s | FileCheck %s
+
+; Unroll twice, with first loop exit kept
+; CHECK-LABEL: @s32_max1
+; CHECK: do.body:
+; CHECK:  store
+; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK:  ret void
+; CHECK: do.body.1:
+; CHECK:  store
+; CHECK:  br label %do.end
+define void @s32_max1(i32 %n, i32* %p) {
+entry:
+  %add = add i32 %n, 1
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp slt i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
+
+do.end:
+  ret void
+}
+
+; Unroll thrice, with first loop exit kept
+; CHECK-LABEL: @s32_max2
+; CHECK: do.body:
+; CHECK:  store
+; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK:  ret void
+; CHECK: do.body.1:
+; CHECK:  store
+; CHECK:  store
+; CHECK:  br label %do.end
+define void @s32_max2(i32 %n, i32* %p) {
+entry:
+  %add = add i32 %n, 2
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp slt i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+  ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @s32_maxx
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @s32_maxx(i32 %n, i32 %x, i32* %p) {
+entry:
+  %add = add i32 %x, %n
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp slt i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
+
+do.end:
+  ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @s32_max2_unpredictable_exit
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @s32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
+entry:
+  %add = add i32 %n, 2
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ]
+  %cmp = icmp eq i32 %i.0, %x
+  br i1 %cmp, label %do.end, label %if.end ; unpredictable
+
+if.end:
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp1 = icmp slt i32 %i.0, %add
+  br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+  ret void
+}
+
+; Unroll twice, with first loop exit kept
+; CHECK-LABEL: @u32_max1
+; CHECK: do.body:
+; CHECK:  store
+; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK:  ret void
+; CHECK: do.body.1:
+; CHECK:  store
+; CHECK:  br label %do.end
+define void @u32_max1(i32 %n, i32* %p) {
+entry:
+  %add = add i32 %n, 1
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp ult i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
+
+do.end:
+  ret void
+}
+
+; Unroll thrice, with first loop exit kept
+; CHECK-LABEL: @u32_max2
+; CHECK: do.body:
+; CHECK:  store
+; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK:  ret void
+; CHECK: do.body.1:
+; CHECK:  store
+; CHECK:  store
+; CHECK:  br label %do.end
+define void @u32_max2(i32 %n, i32* %p) {
+entry:
+  %add = add i32 %n, 2
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp ult i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+  ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @u32_maxx
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @u32_maxx(i32 %n, i32 %x, i32* %p) {
+entry:
+  %add = add i32 %x, %n
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp ult i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
+
+do.end:
+  ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @u32_max2_unpredictable_exit
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @u32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
+entry:
+  %add = add i32 %n, 2
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ]
+  %cmp = icmp eq i32 %i.0, %x
+  br i1 %cmp, label %do.end, label %if.end ; unpredictable
+
+if.end:
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp1 = icmp ult i32 %i.0, %add
+  br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,61 @@
+; RUN: opt -S -unroll-runtime -loop-unroll < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;; Check that we don't emit expensive instructions to compute trip
+;; counts when unrolling loops.
+
+define i32 @test(i64 %v12, i8* %array, i64* %loc) {
+; CHECK-LABEL: @test(
+; CHECK-NOT: udiv
+entry:
+  %step = load i64, i64* %loc, !range !0
+  br label %loop
+
+loop:                                           ; preds = %entry, %loop
+  %k.015 = phi i64 [ %v15, %loop ], [ %v12, %entry ]
+  %v14 = getelementptr inbounds i8, i8* %array, i64 %k.015
+  store i8 0, i8* %v14
+  %v15 = add nuw nsw i64 %k.015, %step
+  %v16 = icmp slt i64 %v15, 8193
+  br i1 %v16, label %loop, label %loopexit
+
+loopexit:                             ; preds = %loop
+  ret i32 0
+}
+
+;; Though SCEV for loop tripcount contains division,
+;; it shouldn't be considered expensive, since the division already
+;; exists in the code and we don't need to expand it once more.
+;; Thus, it shouldn't prevent us from unrolling the loop.
+
+define i32 @test2(i64* %loc, i64 %conv7) {
+; CHECK-LABEL: @test2(
+; CHECK: udiv
+; CHECK: udiv
+; CHECK-NOT: udiv
+; CHECK-LABEL: for.body
+entry:
+  %rem0 = load i64, i64* %loc, align 8
+  %ExpensiveComputation = udiv i64 %rem0, 42 ; <<< Extra computations are added to the trip-count expression
+  br label %bb1
+bb1:
+  %div11 = udiv i64 %ExpensiveComputation, %conv7
+  %cmp.i38 = icmp ugt i64 %div11, 1
+  %div12 = select i1 %cmp.i38, i64 %div11, i64 1
+  br label %for.body
+for.body:
+  %rem1 = phi i64 [ %rem0, %bb1 ], [ %rem2, %for.body ]
+  %k1 = phi i64 [ %div12, %bb1 ], [ %dec, %for.body ]
+  %mul1 = mul i64 %rem1, 48271
+  %rem2 = urem i64 %mul1, 2147483647
+  %dec = add i64 %k1, -1
+  %cmp = icmp eq i64 %dec, 0
+  br i1 %cmp, label %exit, label %for.body
+exit:
+  %rem3 = phi i64 [ %rem2, %for.body ]
+  store i64 %rem3, i64* %loc, align 8
+  ret i32 0
+}
+
+!0 = !{i64 1, i64 100}

Added: llvm/trunk/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,133 @@
+; REQUIRES: asserts
+; RUN: opt < %s -disable-output -stats -loop-unroll -info-output-file - | FileCheck %s --check-prefix=STATS
+; STATS: 1 loop-unroll - Number of loops unrolled (completely or otherwise)
+; Test that llvm.annotation intrinsic do not count against the loop body size
+; and prevent unrolling.
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+ at B = common global i32 0, align 4
+
+define void @foo(i32* noalias %A, i32 %B, i32 %C) {
+entry:
+  br label %for.body
+
+; A loop that has a small loop body (except for the annotations) that should be
+; unrolled with the default heuristic. Make sure the extra annotations do not
+; prevent unrolling
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  ; The real loop.
+  %mul = mul nsw i32 %B, %C
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.01
+  store i32 %mul, i32* %arrayidx, align 4
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp ne i32 %inc, 4
+
+  ; A bunch of annotations
+  %annot.0 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.1 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.2 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.3 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.4 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.5 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.6 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.7 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.8 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.9 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.10 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.11 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.12 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.13 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.14 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.15 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.16 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.17 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.18 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.19 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.20 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.21 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.22 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.23 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.24 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.25 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.26 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.27 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.28 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.29 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.30 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.31 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.32 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.33 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.34 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.35 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.36 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.37 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.38 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.39 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.40 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.41 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.42 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.43 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.44 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.45 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.46 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.47 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.48 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.49 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.50 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.51 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.52 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.53 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.54 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.55 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.56 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.57 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.58 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.59 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.60 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.61 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.62 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.63 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.64 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.65 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.66 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.67 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.68 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.69 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.70 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.71 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.72 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.73 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.74 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.75 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.76 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.77 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.78 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.79 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.80 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.81 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.82 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.83 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.84 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.85 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.86 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.87 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.88 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.89 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.90 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.91 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.92 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.93 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.94 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.95 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.96 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.97 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.98 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.99 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32)

Added: llvm/trunk/test/Transforms/LoopUnroll/invalidate_right_loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/invalidate_right_loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/invalidate_right_loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/invalidate_right_loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,51 @@
+; RUN: opt < %s -S -indvars -loop-unroll -verify-dom-info | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure that this test doesn't crash because of dangling pointer in SCEV.
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @test(i32* %p, i8** %p2, i64* %dest) {
+
+; CHECK-LABEL: @test(
+
+entry:
+  br label %outer.loop
+
+outer.loop:                                           ; preds = %outer.latch, %entry
+  %local_2_ = phi i32 [ 10, %entry ], [ %tmp2, %outer.latch ]
+  %tmp1 = icmp eq i32 %local_2_, 0
+  br label %inner.loop
+
+outer.latch:                                          ; preds = %inner.latch
+  %tmp2 = add i32 %local_2_, 1
+  br label %outer.loop
+
+inner.loop:                                           ; preds = %inner.latch, %outer.loop
+  %local_4_20 = phi i32 [ 7, %outer.loop ], [ %tmp15, %inner.latch ]
+  %tmp6 = icmp eq i32 %local_4_20, 0
+  call void (i1, ...) @llvm.experimental.guard(i1 %tmp6) [ "deopt"() ]
+  br label %innermost.loop
+
+store.block:                                          ; preds = %innermost.loop
+  store i64 %tmp20, i64* %dest, align 8
+  br i1 %tmp1, label %exit, label %inner.latch
+
+inner.latch:                                   ; preds = %store.block
+  %tmp15 = add i32 %local_4_20, 4
+  %tmp16 = icmp sgt i32 %tmp15, 263
+  br i1 %tmp16, label %outer.latch, label %inner.loop
+
+innermost.loop:                                          ; preds = %innermost.loop, %inner.loop
+  %tmp17 = phi i64 [ 0, %inner.loop ], [ %tmp20, %innermost.loop ]
+  %local_6_51 = phi i32 [ 1, %inner.loop ], [ %tmp21, %innermost.loop ]
+  %ze = zext i32 %local_6_51 to i64
+  %tmp20 = add i64 %tmp17, %ze
+  %tmp21 = add nuw nsw i32 %local_6_51, 1
+  %tmp22 = icmp ugt i32 %local_6_51, 5
+  br i1 %tmp22, label %store.block, label %innermost.loop
+
+exit:                                           ; preds = %store.block
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/loop-remarks-with-hotness.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/loop-remarks-with-hotness.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/loop-remarks-with-hotness.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/loop-remarks-with-hotness.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -pass-remarks-with-hotness -unroll-count=16 2>&1 | FileCheck -check-prefix=COMPLETE-UNROLL %s
+; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -pass-remarks-with-hotness -unroll-count=4 2>&1 | FileCheck -check-prefix=PARTIAL-UNROLL %s
+
+; COMPLETE-UNROLL: remark: {{.*}}: completely unrolled loop with 16 iterations (hotness: 300)
+; PARTIAL-UNROLL: remark: {{.*}}: unrolled loop by a factor of 4 {{.*}} (hotness: 300)
+
+define i32 @sum() !prof !0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %s.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ]
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %add = add nsw i32 %i.05, 4
+  %call = tail call i32 @baz(i32 %add) #2
+  %add1 = add nsw i32 %call, %s.06
+  %inc = add nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 16
+  br i1 %exitcond, label %for.end, label %for.body, !prof !1
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add1
+}
+
+declare i32 @baz(i32)
+
+!0 = !{!"function_entry_count", i64 3}
+!1 = !{!"branch_weights", i32 1, i32 99}

Added: llvm/trunk/test/Transforms/LoopUnroll/loop-remarks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/loop-remarks.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/loop-remarks.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/loop-remarks.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,48 @@
+; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -unroll-count=16 2>&1 | FileCheck -check-prefix=COMPLETE-UNROLL %s
+; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -unroll-count=4 2>&1 | FileCheck -check-prefix=PARTIAL-UNROLL %s
+; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -unroll-count=4 -unroll-runtime=true -unroll-remainder 2>&1 | FileCheck %s --check-prefix=RUNTIME-UNROLL
+
+; COMPLETE-UNROLL: remark: {{.*}}: completely unrolled loop with 16 iterations
+; PARTIAL-UNROLL: remark: {{.*}}: unrolled loop by a factor of 4
+; RUNTIME-UNROLL: remark: {{.*}}: unrolled loop by a factor of 4
+
+define i32 @sum() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %s.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ]
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %add = add nsw i32 %i.05, 4
+  %call = tail call i32 @baz(i32 %add) #2
+  %add1 = add nsw i32 %call, %s.06
+  %inc = add nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 16
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add1
+}
+
+; RUNTIME-UNROLL-NOT: remark: {{.*}}: completely unrolled loop with 3 iterations
+; RUNTIME-UNROLL: remark: {{.*}}: unrolled loop by a factor of 4
+
+define i32 @runtime(i32 %n) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %s.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ]
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %add = add nsw i32 %i.05, 4
+  %call = tail call i32 @baz(i32 %add) #2
+  %add1 = add nsw i32 %call, %s.06
+  %inc = add nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add1
+}
+
+declare i32 @baz(i32)
\ No newline at end of file

Added: llvm/trunk/test/Transforms/LoopUnroll/not-rotated.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/not-rotated.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/not-rotated.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/not-rotated.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; PR28103
+; Bail out if the two successors are not the header
+; and another bb outside of the loop. This case is not
+; properly handled by LoopUnroll, currently.
+
+; RUN: opt -loop-unroll -verify-dom-info %s
+; REQUIRES: asserts
+
+define void @tinkywinky(i1 %patatino) {
+entry:
+  br label %header1
+header1:
+  %indvars.iv = phi i64 [ 1, %body2 ], [ 0, %entry ]
+  %exitcond = icmp ne i64 %indvars.iv, 1
+  br i1 %exitcond, label %body1, label %exit
+body1:
+  br i1 %patatino, label %body2, label %sink
+body2:
+  br i1 %patatino, label %header1, label %body3
+body3:
+  br label %sink
+sink:
+  br label %body2
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt < %s -S -unroll-partial-threshold=20 -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-partial-threshold=20 -unroll-threshold=20 -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s
+;
+; Also check that the simple unroller doesn't allow the partial unrolling.
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-partial-threshold=20 -unroll-threshold=20 -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-max-percent-threshold-boost=100 | FileCheck %s --check-prefix=CHECK-NO-UNROLL
+
+; The Loop TripCount is 9. However unroll factors 3 or 9 exceed given threshold.
+; The test checks that we choose a smaller, power-of-two, unroll count and do not give up on unrolling.
+
+; CHECK: for.body:
+; CHECK: store
+; CHECK: for.body.1:
+; CHECK: store
+
+; CHECK-NO-UNROLL: for.body:
+; CHECK-NO-UNROLL: store
+; CHECK-NO-UNROLL-NOT: store
+
+define void @foo(i32* nocapture %a, i32* nocapture readonly %b) nounwind uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %ld = load i32, i32* %arrayidx, align 4
+  %idxprom1 = sext i32 %ld to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
+  %st = trunc i64 %indvars.iv to i32
+  store i32 %st, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-conditions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/peel-loop-conditions.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-conditions.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-conditions.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,645 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-unroll -verify-dom-info | FileCheck %s
+
+declare void @f1()
+declare void @f2()
+
+; Check that we can peel off iterations that make conditions true.
+define void @test1(i32 %k) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  for.body.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK:       for.body.peel.begin:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
+; CHECK:       for.body.peel:
+; CHECK-NEXT:    [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2
+; CHECK-NEXT:    br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]]
+; CHECK:       if.else.peel:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL:%.*]]
+; CHECK:       if.then.peel:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL]]
+; CHECK:       for.inc.peel:
+; CHECK-NEXT:    [[INC_PEEL:%.*]] = add nsw i32 0, 1
+; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%[^,]*]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK:       for.body.peel.next:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL2:%.*]]
+; CHECK:       for.body.peel2:
+; CHECK-NEXT:    [[CMP1_PEEL3:%.*]] = icmp ult i32 [[INC_PEEL]], 2
+; CHECK-NEXT:    br i1 [[CMP1_PEEL3]], label [[IF_THEN_PEEL5:%.*]], label [[IF_ELSE_PEEL4:%.*]]
+; CHECK:       if.else.peel4:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL6:%.*]]
+; CHECK:       if.then.peel5:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL6]]
+; CHECK:       for.inc.peel6:
+; CHECK-NEXT:    [[INC_PEEL7:%.*]] = add nsw i32 [[INC_PEEL]], 1
+; CHECK-NEXT:    [[CMP_PEEL8:%.*]] = icmp slt i32 [[INC_PEEL7]], [[K]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL8]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_END]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK:       for.body.peel.next1:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT9:%.*]]
+; CHECK:       for.body.peel.next9:
+; CHECK-NEXT:    br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]]
+; CHECK:       for.body.lr.ph.peel.newph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ [[INC_PEEL7]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    br i1 false, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       if.else:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_05]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !{{.*}}
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %cmp1 = icmp ult i32 %i.05, 2
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+  call void @f1()
+  br label %for.inc
+
+if.else:
+  call void @f2()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1
+
+for.end:
+  ret void
+}
+
+!1 = distinct !{!1}
+
+; Check we peel off the maximum number of iterations that make conditions true.
+define void @test2(i32 %k) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  for.body.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK:       for.body.peel.begin:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
+; CHECK:       for.body.peel:
+; CHECK-NEXT:    [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2
+; CHECK-NEXT:    br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]]
+; CHECK:       if.else.peel:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[IF2_PEEL:%.*]]
+; CHECK:       if.then.peel:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[IF2_PEEL]]
+; CHECK:       if2.peel:
+; CHECK-NEXT:    [[CMP2_PEEL:%.*]] = icmp ult i32 0, 4
+; CHECK-NEXT:    br i1 [[CMP2_PEEL]], label [[IF_THEN2_PEEL:%.*]], label [[FOR_INC_PEEL:%.*]]
+; CHECK:       if.then2.peel:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL]]
+; CHECK:       for.inc.peel:
+; CHECK-NEXT:    [[INC_PEEL:%.*]] = add nsw i32 0, 1
+; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%[^,]*]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK:       for.body.peel.next:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL2:%.*]]
+; CHECK:       for.body.peel2:
+; CHECK-NEXT:    [[CMP1_PEEL3:%.*]] = icmp ult i32 [[INC_PEEL]], 2
+; CHECK-NEXT:    br i1 [[CMP1_PEEL3]], label [[IF_THEN_PEEL5:%.*]], label [[IF_ELSE_PEEL4:%.*]]
+; CHECK:       if.else.peel4:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[IF2_PEEL6:%.*]]
+; CHECK:       if.then.peel5:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[IF2_PEEL6]]
+; CHECK:       if2.peel6:
+; CHECK-NEXT:    [[CMP2_PEEL7:%.*]] = icmp ult i32 [[INC_PEEL]], 4
+; CHECK-NEXT:    br i1 [[CMP2_PEEL7]], label [[IF_THEN2_PEEL8:%.*]], label [[FOR_INC_PEEL9:%.*]]
+; CHECK:       if.then2.peel8:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL9]]
+; CHECK:       for.inc.peel9:
+; CHECK-NEXT:    [[INC_PEEL10:%.*]] = add nsw i32 [[INC_PEEL]], 1
+; CHECK-NEXT:    [[CMP_PEEL11:%.*]] = icmp slt i32 [[INC_PEEL10]], [[K]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL11]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_END]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK:       for.body.peel.next1:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL13:%.*]]
+; CHECK:       for.body.peel13:
+; CHECK-NEXT:    [[CMP1_PEEL14:%.*]] = icmp ult i32 [[INC_PEEL10]], 2
+; CHECK-NEXT:    br i1 [[CMP1_PEEL14]], label [[IF_THEN_PEEL16:%.*]], label [[IF_ELSE_PEEL15:%.*]]
+; CHECK:       if.else.peel15:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[IF2_PEEL17:%.*]]
+; CHECK:       if.then.peel16:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[IF2_PEEL17]]
+; CHECK:       if2.peel17:
+; CHECK-NEXT:    [[CMP2_PEEL18:%.*]] = icmp ult i32 [[INC_PEEL10]], 4
+; CHECK-NEXT:    br i1 [[CMP2_PEEL18]], label [[IF_THEN2_PEEL19:%.*]], label [[FOR_INC_PEEL20:%.*]]
+; CHECK:       if.then2.peel19:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL20]]
+; CHECK:       for.inc.peel20:
+; CHECK-NEXT:    [[INC_PEEL21:%.*]] = add nsw i32 [[INC_PEEL10]], 1
+; CHECK-NEXT:    [[CMP_PEEL22:%.*]] = icmp slt i32 [[INC_PEEL21]], [[K]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL22]], label [[FOR_BODY_PEEL_NEXT12:%.*]], label [[FOR_END]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK:       for.body.peel.next12:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL24:%.*]]
+; CHECK:       for.body.peel24:
+; CHECK-NEXT:    [[CMP1_PEEL25:%.*]] = icmp ult i32 [[INC_PEEL21]], 2
+; CHECK-NEXT:    br i1 [[CMP1_PEEL25]], label [[IF_THEN_PEEL27:%.*]], label [[IF_ELSE_PEEL26:%.*]]
+; CHECK:       if.else.peel26:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[IF2_PEEL28:%.*]]
+; CHECK:       if.then.peel27:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[IF2_PEEL28]]
+; CHECK:       if2.peel28:
+; CHECK-NEXT:    [[CMP2_PEEL29:%.*]] = icmp ult i32 [[INC_PEEL21]], 4
+; CHECK-NEXT:    br i1 [[CMP2_PEEL29]], label [[IF_THEN2_PEEL30:%.*]], label [[FOR_INC_PEEL31:%.*]]
+; CHECK:       if.then2.peel30:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL31]]
+; CHECK:       for.inc.peel31:
+; CHECK-NEXT:    [[INC_PEEL32:%.*]] = add nsw i32 [[INC_PEEL21]], 1
+; CHECK-NEXT:    [[CMP_PEEL33:%.*]] = icmp slt i32 [[INC_PEEL32]], [[K]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL33]], label [[FOR_BODY_PEEL_NEXT23:%.*]], label [[FOR_END]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK:       for.body.peel.next23:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT34:%.*]]
+; CHECK:       for.body.peel.next34:
+; CHECK-NEXT:    br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]]
+; CHECK:       for.body.lr.ph.peel.newph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ [[INC_PEEL32]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    br i1 false, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[IF2:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[IF2]]
+; CHECK:       if2:
+; CHECK-NEXT:    br i1 false, label [[IF_THEN2:%.*]], label [[FOR_INC]]
+; CHECK:       if.then2:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_05]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !{{.*}}
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %cmp1 = icmp ult i32 %i.05, 2
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+  call void @f1()
+  br label %if2
+
+if.else:
+  call void @f2()
+  br label %if2
+
+if2:
+  %cmp2 = icmp ult i32 %i.05, 4
+  br i1 %cmp2, label %if.then2, label %for.inc
+
+if.then2:
+  call void @f1()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.end, !llvm.loop !2
+
+for.end:
+  ret void
+}
+
+!2 = distinct !{!2}
+
+; Check that we can peel off iterations that make a condition false.
+define void @test3(i32 %k) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  for.body.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK:       for.body.peel.begin:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
+; CHECK:       for.body.peel:
+; CHECK-NEXT:    [[CMP1_PEEL:%.*]] = icmp ugt i32 0, 2
+; CHECK-NEXT:    br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]]
+; CHECK:       if.else.peel:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL:%.*]]
+; CHECK:       if.then.peel:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL]]
+; CHECK:       for.inc.peel:
+; CHECK-NEXT:    [[INC_PEEL:%.*]] = add nsw i32 0, 1
+; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%[^,]*]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK:       for.body.peel.next:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL2:%.*]]
+; CHECK:       for.body.peel2:
+; CHECK-NEXT:    [[CMP1_PEEL3:%.*]] = icmp ugt i32 [[INC_PEEL]], 2
+; CHECK-NEXT:    br i1 [[CMP1_PEEL3]], label [[IF_THEN_PEEL5:%.*]], label [[IF_ELSE_PEEL4:%.*]]
+; CHECK:       if.else.peel4:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL6:%.*]]
+; CHECK:       if.then.peel5:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL6]]
+; CHECK:       for.inc.peel6:
+; CHECK-NEXT:    [[INC_PEEL7:%.*]] = add nsw i32 [[INC_PEEL]], 1
+; CHECK-NEXT:    [[CMP_PEEL8:%.*]] = icmp slt i32 [[INC_PEEL7]], [[K]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL8]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_END]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK:       for.body.peel.next1:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL10:%.*]]
+; CHECK:       for.body.peel10:
+; CHECK-NEXT:    [[CMP1_PEEL11:%.*]] = icmp ugt i32 [[INC_PEEL7]], 2
+; CHECK-NEXT:    br i1 [[CMP1_PEEL11]], label [[IF_THEN_PEEL13:%.*]], label [[IF_ELSE_PEEL12:%.*]]
+; CHECK:       if.else.peel12:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL14:%.*]]
+; CHECK:       if.then.peel13:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC_PEEL14]]
+; CHECK:       for.inc.peel14:
+; CHECK-NEXT:    [[INC_PEEL15:%.*]] = add nsw i32 [[INC_PEEL7]], 1
+; CHECK-NEXT:    [[CMP_PEEL16:%.*]] = icmp slt i32 [[INC_PEEL15]], [[K]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL16]], label [[FOR_BODY_PEEL_NEXT9:%.*]], label [[FOR_END]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK:       for.body.peel.next9:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT17:%.*]]
+; CHECK:       for.body.peel.next17:
+; CHECK-NEXT:    br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]]
+; CHECK:       for.body.lr.ph.peel.newph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ [[INC_PEEL15]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    br i1 true, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       if.else:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_05]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !{{.*}}
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %cmp1 = icmp ugt i32 %i.05, 2
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+  call void @f1()
+  br label %for.inc
+
+if.else:
+  call void @f2()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.end, !llvm.loop !3
+
+for.end:
+  ret void
+}
+
+!3 = distinct !{!3}
+
+; Test that we only peel off iterations if it simplifies a condition in the
+; loop body after peeling at most MaxPeelCount iterations.
+define void @test4(i32 %k) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  for.body.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[I_05]], 9999
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_05]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %cmp1 = icmp ugt i32 %i.05, 9999
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:
+  call void @f1()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; In this case we cannot peel the inner loop, because the condition involves
+; the outer induction variable.
+define void @test5(i32 %k) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  for.body.lr.ph:
+; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
+; CHECK:       outer.header:
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[J]], 2
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       if.else:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_05]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
+; CHECK:       outer.inc:
+; CHECK-NEXT:    [[J_INC]] = add nsw i32 [[J]], 1
+; CHECK-NEXT:    [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
+; CHECK-NEXT:    br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+for.body.lr.ph:
+  br label %outer.header
+
+outer.header:
+  %j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ]
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ]
+  %cmp1 = icmp ult i32 %j, 2
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+  call void @f1()
+  br label %for.inc
+
+if.else:
+  call void @f2()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %outer.inc
+
+outer.inc:
+  %j.inc = add nsw i32 %j, 1
+  %outer.cmp = icmp slt i32 %j.inc, %k
+  br i1 %outer.cmp, label %outer.header, label %for.end
+
+
+for.end:
+  ret void
+}
+
+; In this test, the condition involves 2 AddRecs. Without evaluating both
+; AddRecs, we cannot prove that the condition becomes known in the loop body
+; after peeling.
+define void @test6(i32 %k) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 4, [[ENTRY]] ], [ [[J_INC:%.*]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[I_05]], [[J]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       if.else:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_05]], 2
+; CHECK-NEXT:    [[J_INC]] = add nsw i32 [[J]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %j = phi i32 [ 4, %entry ], [ %j.inc, %for.inc ]
+  %cmp1 = icmp ult i32 %i.05, %j
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+  call void @f1()
+  br label %for.inc
+
+if.else:
+  call void @f2()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.05, 2
+  %j.inc = add nsw i32 %j, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+define void @test7(i32 %k) {
+; FIXME: Could simplify loop body by peeling one additional iteration after
+;        i != 3 becomes false
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  for.body.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[I_05]], 3
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_05]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %cmp1 = icmp ne i32 %i.05, 3
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:
+  call void @f1()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+define void @test8(i32 %k) {
+; FIXME: Could simplify loop body by peeling one additional iteration after
+;        i == 3 becomes true.
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  for.body.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[I_05]], 3
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_05]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %cmp1 = icmp eq i32 %i.05, 3
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:
+  call void @f1()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Comparison with non-monotonic predicate due to possible wrapping, loop
+; body cannot be simplified.
+define void @test9(i32 %k) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  for.body.lr.ph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[I_05]], 3
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add i32 [[I_05]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %cmp1 = icmp slt i32 %i.05, 3
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:
+  call void @f1()
+  br label %for.inc
+
+for.inc:
+  %inc = add i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-irreducible.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/peel-loop-irreducible.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-irreducible.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-irreducible.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=1 | FileCheck %s
+
+; Check we don't peel loops where the latch is not the exiting block.
+; CHECK-LABEL: @invariant_backedge_irreducible
+; CHECK: entry:
+; CHECK: br label %header
+; CHECK-NOT: peel
+; CHECK: header:
+; CHECK: br i1 {{.*}} label %latch, label %exiting
+; CHECK: latch:
+; CHECK: br i1 {{.*}} label %header, label %exiting
+; CHECK: exiting:
+; CHECK: br i1 {{.*}} label %latch, label %exit
+
+define i32 @invariant_backedge_irreducible(i32 %a, i32 %b) {
+entry:
+  br label %header
+
+header:
+  %i = phi i32 [ 0, %entry ], [ %inc, %latch ]
+  %cmp.phi = phi i1 [ false, %entry ], [ %cmp, %latch ]
+  br i1 %cmp.phi, label %latch, label %exiting
+
+latch:
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+  br i1 %cmp, label %header, label %exiting
+
+exiting:
+  %cmp.exiting = phi i1 [ %cmp.phi, %header ], [ %cmp, %latch ]
+  br i1 %cmp.exiting, label %latch, label %exit
+
+exit:
+  ret i32 0
+}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-negative.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/peel-loop-negative.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-negative.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-negative.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=800 -unroll-peel-max-count=0 | FileCheck %s
+
+; We should not peel this loop even though we can, because the max count is set
+; to zero.
+define i32 @invariant_backedge_neg_1(i32 %a, i32 %b) {
+; CHECK-LABEL: @invariant_backedge_neg_1
+; CHECK-NOT    loop.peel{{.*}}:
+; CHECK:       loop:
+; CHECK:         %i = phi
+; CHECK:         %sum = phi
+; CHECK:         %plus = phi
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+  %plus = phi i32 [ %a, %entry ], [ %b, %loop ]
+
+  %incsum = add i32 %sum, %plus
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,199 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=30 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=30 -unroll-allow-peeling=false | FileCheck %s --check-prefix=DISABLE
+
+define i32 @invariant_backedge_1(i32 %a, i32 %b) {
+; CHECK-LABEL: @invariant_backedge_1
+; CHECK-NOT:     %plus = phi
+; CHECK:       loop.peel:
+; CHECK:       loop:
+; CHECK:         %i = phi
+; CHECK:         %sum = phi
+; DISABLE-LABEL: @invariant_backedge_1
+; DISABLE-NOT: loop.peel:
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+  %plus = phi i32 [ %a, %entry ], [ %b, %loop ]
+
+  %incsum = add i32 %sum, %plus
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @invariant_backedge_2(i32 %a, i32 %b) {
+; This loop should be peeled twice because it has a Phi which becomes invariant
+; starting from 3rd iteration.
+; CHECK-LABEL: @invariant_backedge_2
+; CHECK:       loop.peel{{.*}}:
+; CHECK:       loop.peel{{.*}}:
+; CHECK:         %i = phi
+; CHECK:         %sum = phi
+; CHECK-NOT:     %half.inv = phi
+; CHECK-NOT:     %plus = phi
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+  %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+  %plus = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+
+  %incsum = add i32 %sum, %plus
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @invariant_backedge_3(i32 %a, i32 %b) {
+; This loop should be peeled thrice because it has a Phi which becomes invariant
+; starting from 4th iteration.
+; CHECK-LABEL: @invariant_backedge_3
+; CHECK:       loop.peel{{.*}}:
+; CHECK:       loop.peel{{.*}}:
+; CHECK:       loop.peel{{.*}}:
+; CHECK:         %i = phi
+; CHECK:         %sum = phi
+; CHECK-NOT:     %half.inv = phi
+; CHECK-NOT:     %half.inv.2 = phi
+; CHECK-NOT:     %plus = phi
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+  %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+  %half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+  %plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ]
+
+  %incsum = add i32 %sum, %plus
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @invariant_backedge_limited_by_size(i32 %a, i32 %b) {
+; This loop should normally be peeled thrice because it has a Phi which becomes
+; invariant starting from 4th iteration, but the size of the loop only allows
+; us to peel twice because we are restricted to 30 instructions in resulting
+; code. Thus, %plus Phi node should stay in loop even despite its backedge
+; input is an invariant.
+; CHECK-LABEL: @invariant_backedge_limited_by_size
+; CHECK:       loop.peel{{.*}}:
+; CHECK:       loop.peel{{.*}}:
+; CHECK:         %i = phi
+; CHECK:         %sum = phi
+; CHECK:         %plus = phi i32 [ %a, {{.*}} ], [ %b, %loop ]
+; CHECK-NOT:     %half.inv = phi
+; CHECK-NOT:     %half.inv.2 = phi
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+  %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+  %half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+  %plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ]
+
+  %incsum = add i32 %sum, %plus
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+
+  %incsum2 = add i32 %incsum, %plus
+  %incsum3 = add i32 %incsum, %plus
+  %incsum4 = add i32 %incsum, %plus
+  %incsum5 = add i32 %incsum, %plus
+  %incsum6 = add i32 %incsum, %plus
+  %incsum7 = add i32 %incsum, %plus
+
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum
+}
+
+; Peeling should fail due to method size.
+define i32 @invariant_backedge_negative(i32 %a, i32 %b) {
+; CHECK-LABEL: @invariant_backedge_negative
+; CHECK-NOT:   loop.peel{{.*}}:
+; CHECK:       loop:
+; CHECK:         %i = phi
+; CHECK:         %sum = phi
+; CHECK:         %plus = phi
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %sum = phi i32 [ 0, %entry ], [ %incsum2, %loop ]
+  %plus = phi i32 [ %a, %entry ], [ %b, %loop ]
+
+  %incsum = add i32 %sum, %plus
+  %incsum2 = add i32 %incsum, %plus
+  %incsum3 = add i32 %incsum, %plus
+  %incsum4 = add i32 %incsum, %plus
+  %incsum5 = add i32 %incsum, %plus
+  %incsum6 = add i32 %incsum, %plus
+  %incsum7 = add i32 %incsum, %plus
+  %incsum8 = add i32 %incsum, %plus
+  %incsum9 = add i32 %incsum, %plus
+  %incsum10 = add i32 %incsum, %plus
+  %incsum11 = add i32 %incsum, %plus
+  %incsum12 = add i32 %incsum, %plus
+  %incsum13 = add i32 %incsum, %plus
+  %incsum14 = add i32 %incsum, %plus
+  %incsum15 = add i32 %incsum, %plus
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @cycled_phis(i32 %a, i32 %b) {
+; Make sure that we do not crash working with cycled Phis and don't peel it.
+; TODO: Actually this loop should be partially unrolled with factor 2.
+; CHECK-LABEL: @cycled_phis
+; CHECK-NOT:   loop.peel{{.*}}:
+; CHECK:       loop:
+; CHECK:         %i = phi
+; CHECK:         %phi.a = phi
+; CHECK:         %phi.b = phi
+; CHECK:         %sum = phi
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %phi.a = phi i32 [ %a, %entry ], [ %phi.b, %loop ]
+  %phi.b = phi i32 [ %b, %entry ], [ %phi.a, %loop ]
+  %sum = phi i32 [ 0, %entry], [ %incsum, %loop ]
+  %incsum = add i32 %sum, %phi.a
+  %inc = add i32 %i, 1
+  %cmp = icmp slt i32 %i, 1000
+
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %sum
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-pgo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,110 @@
+; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll 2>&1 | FileCheck %s
+; RUN: opt < %s -S -debug-only=loop-unroll -passes='require<profile-summary>,function(require<opt-remark-emit>,unroll)' 2>&1 | FileCheck %s
+; Confirm that peeling is disabled if the number of counts required to reach
+; the hot percentile is above the threshold.
+; RUN: opt < %s -S -profile-summary-huge-working-set-size-threshold=9 -debug-only=loop-unroll -passes='require<profile-summary>,function(require<opt-remark-emit>,unroll)' 2>&1 | FileCheck %s --check-prefix=NOPEEL
+; REQUIRES: asserts
+
+; Make sure we use the profile information correctly to peel-off 3 iterations
+; from the loop, and update the branch weights for the peeled loop properly.
+
+; CHECK: Loop Unroll: F[basic]
+; CHECK: PEELING loop %for.body with iteration count 3!
+; CHECK: Loop Unroll: F[optsize]
+; CHECK-NOT: PEELING
+
+; Confirm that no peeling occurs when we are performing full unrolling.
+; RUN: opt < %s -S -debug-only=loop-unroll -passes='require<opt-remark-emit>,loop(unroll-full)' 2>&1 | FileCheck %s --check-prefix=NOPEEL
+; NOPEEL-NOT: PEELING
+
+; CHECK-LABEL: @basic
+; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !15
+; CHECK: [[NEXT0]]:
+; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !16
+; CHECK: [[NEXT1]]:
+; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !17
+; CHECK: [[NEXT2]]:
+; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !18
+
+define void @basic(i32* %p, i32 %k) #0 !prof !15 {
+entry:
+  %cmp3 = icmp slt i32 0, %k
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1
+  store i32 %i.05, i32* %p.addr.04, align 4
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !prof !16
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret void
+}
+
+; We don't want to peel loops when optimizing for size.
+; CHECK-LABEL: @optsize
+; CHECK: for.body.lr.ph:
+; CHECK-NEXT: br label %for.body
+; CHECK: for.body:
+; CHECK-NOT: br
+; CHECK: br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+define void @optsize(i32* %p, i32 %k) #1 !prof !15 {
+entry:
+  %cmp3 = icmp slt i32 0, %k
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1
+  store i32 %i.05, i32* %p.addr.04, align 4
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !prof !16
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind optsize }
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10}
+!5 = !{!"MaxCount", i64 3}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 3}
+!8 = !{!"NumCounts", i64 2}
+!9 = !{!"NumFunctions", i64 2}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 3, i32 2}
+!13 = !{i32 999000, i64 1, i32 10}
+!14 = !{i32 999999, i64 1, i32 10}
+!15 = !{!"function_entry_count", i64 1}
+!16 = !{!"branch_weights", i32 3001, i32 1001}
+
+;CHECK: !15 = !{!"branch_weights", i32 900, i32 101}
+;CHECK: !16 = !{!"branch_weights", i32 540, i32 360}
+;CHECK: !17 = !{!"branch_weights", i32 162, i32 378}
+;CHECK: !18 = !{!"branch_weights", i32 1399, i32 162}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-scev-invalidate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/peel-loop-scev-invalidate.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-scev-invalidate.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-scev-invalidate.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=1 -verify-scev -verify-dom-info | FileCheck %s
+
+
+define void @test1(i32 %k) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
+; CHECK:       for.body.peel.begin:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
+; CHECK:       for.body.peel:
+; CHECK-NEXT:    [[INC_PEEL:%.*]] = add nsw i32 0, 1
+; CHECK-NEXT:    [[CMP_PEEL:%.*]] = icmp ult i32 0, [[K:%.*]]
+; CHECK-NEXT:    br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.peel.next:
+; CHECK-NEXT:    br label [[FOR_BODY_PEEL_NEXT1:%.*]]
+; CHECK:       for.body.peel.next1:
+; CHECK-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
+; CHECK:       entry.peel.newph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ [[INC_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_05]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[I_05]], [[K]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !0
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp ult i32 %i.05, %k
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/peel-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/peel-loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/peel-loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,115 @@
+; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=3 -verify-dom-info -simplifycfg -instcombine | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
+
+; Basic loop peeling - check that we can peel-off the first 3 loop iterations
+; when explicitly requested.
+; CHECK-LABEL: @basic
+; CHECK: %[[CMP0:.*]] = icmp sgt i32 %k, 0
+; CHECK: br i1 %[[CMP0]], label %[[NEXT0:.*]], label %for.end
+; CHECK: [[NEXT0]]:
+; CHECK: store i32 0, i32* %p, align 4
+; CHECK: %[[CMP1:.*]] = icmp eq i32 %k, 1
+; CHECK: br i1 %[[CMP1]], label %for.end, label %[[NEXT1:[^,]*]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK: [[NEXT1]]:
+; CHECK: %[[INC1:.*]] = getelementptr inbounds i32, i32* %p, i64 1
+; CHECK: store i32 1, i32* %[[INC1]], align 4
+; CHECK: %[[CMP2:.*]] = icmp sgt i32 %k, 2
+; CHECK: br i1 %[[CMP2]], label %[[NEXT2:.*]], label %for.end
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK: [[NEXT2]]:
+; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2
+; CHECK: store i32 2, i32* %[[INC2]], align 4
+; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3
+; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:[^,]*]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK: br i1 %[[CMP4:.*]], label %[[LOOP_PH]], label %for.end, !llvm.loop !{{.*}}
+; CHECK: for.end:
+; CHECK: ret void
+
+define void @basic(i32* %p, i32 %k) #0 {
+entry:
+  %cmp3 = icmp slt i32 0, %k
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1
+  store i32 %i.05, i32* %p.addr.04, align 4
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !llvm.loop !1
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret void
+}
+
+!1 = distinct !{!1}
+
+; Make sure peeling works correctly when a value defined in a loop is used
+; in later code - we need to correctly plumb the phi depending on which
+; iteration is actually used.
+; CHECK-LABEL: @output
+; CHECK: %[[CMP0:.*]] = icmp sgt i32 %k, 0
+; CHECK: br i1 %[[CMP0]], label %[[NEXT0:.*]], label %for.end
+; CHECK: [[NEXT0]]:
+; CHECK: store i32 0, i32* %p, align 4
+; CHECK: %[[CMP1:.*]] = icmp eq i32 %k, 1
+; CHECK: br i1 %[[CMP1]], label %for.end, label %[[NEXT1:[^,]*]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK: [[NEXT1]]:
+; CHECK: %[[INC1:.*]] = getelementptr inbounds i32, i32* %p, i64 1
+; CHECK: store i32 1, i32* %[[INC1]], align 4
+; CHECK: %[[CMP2:.*]] = icmp sgt i32 %k, 2
+; CHECK: br i1 %[[CMP2]], label %[[NEXT2:.*]], label %for.end
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK: [[NEXT2]]:
+; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2
+; CHECK: store i32 2, i32* %[[INC2]], align 4
+; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3
+; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:[^,]*]]
+; Verify that MD_loop metadata is dropped.
+; CHECK-NOT:   , !llvm.loop !{{[0-9]*}}
+; CHECK: br i1 %[[CMP4:.*]], label %[[LOOP_PH]], label %for.end, !llvm.loop !{{.*}}
+; CHECK: for.end:
+; CHECK: %ret = phi i32 [ 0, %entry ], [ 1, %[[NEXT0]] ], [ 2, %[[NEXT1]] ], [ 3, %[[NEXT2]] ], [ %inc, %for.body ]
+; CHECK: ret i32 %ret
+define i32 @output(i32* %p, i32 %k) #0 {
+entry:
+  %cmp3 = icmp slt i32 0, %k
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1
+  store i32 %i.05, i32* %p.addr.04, align 4
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !llvm.loop !2
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %ret = phi i32 [ 0, %entry], [ %inc, %for.cond.for.end_crit_edge ]
+  ret i32 %ret
+}
+
+!2 = distinct !{!2}

Added: llvm/trunk/test/Transforms/LoopUnroll/peel-loop2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/peel-loop2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/peel-loop2.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,61 @@
+; RUN: opt -S -loop-unroll -unroll-force-peel-count=1 -verify-dom-info <%s
+
+; Check if loop composed of several BBs is peeled correctly.
+
+declare void @funcb()
+ at Comma = external global i8
+define void @funca(i8* readnone %b, i8* readnone %e) {
+entry:
+  %cmp2 = icmp eq i8* %b, %e
+  br i1 %cmp2, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %b.addr.03 = phi i8* [ %incdec.ptr, %for.inc ], [ %b, %for.body.preheader ]
+  %0 = load i8, i8* @Comma
+  %tobool = icmp eq i8 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:
+  tail call void @funcb()
+  store i8 1, i8* @Comma
+  br label %for.inc
+
+for.inc:
+  %incdec.ptr = getelementptr inbounds i8, i8* %b.addr.03, i64 1
+  %cmp = icmp eq i8* %incdec.ptr, %e
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; CHECK_LABEL: @funca
+
+; Peeled iteration
+; CHECK: %[[REG1:[0-9]+]] = load i8, i8* @Comma
+; CHECK: %[[REG2:.*]] = icmp eq i8 %[[REG1]], 0
+; CHECK: br i1 %[[REG2]], label %{{.*}}, label %[[IFTHEN:.*]]
+; CHECK: [[IFTHEN]]:
+; CHECK: call void @funcb()
+; CHECK: store i8 1, i8* @Comma
+; CHECK: br label %[[FORINC]]
+; CHECK: [[FORINC]]:
+; CHECK: %[[REG3:.*]] = getelementptr inbounds i8, i8* %b, i64 1
+; CHECK: %[[REG4:.*]] = icmp eq i8* %[[REG3]], %e
+; CHECK: br i1 %[[REG4]]
+
+; main body
+; CHECK: %[[REG1b:.*]] = load i8, i8* @Comma
+; CHECK: %[[REG2b:.*]] = icmp eq i8 %[[REG1b]], 0
+; CHECK: br i1 %[[REG2b]], label %{{.*}}, label %[[IFTHENb:.*]]
+; CHECK: [[IFTHENb]]:
+; CHECK: call void @funcb()
+; CHECK: store i8 1, i8* @Comma
+; CHECK: br label %[[FORINCb]]
+; CHECK: [[FORINCb]]:
+; CHECK: %[[REG3b:.*]] = getelementptr inbounds i8, i8* %b, i64 1
+; CHECK: %[[REG4b:.*]] = icmp eq i8* %[[REG3b]], %e
+; CHECK: br i1 %[[REG4b]]

Added: llvm/trunk/test/Transforms/LoopUnroll/pr10813.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/pr10813.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/pr10813.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/pr10813.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -loop-unroll -disable-output
+
+define void @"f_fu___REFUf[]REFUf[]Uf"() nounwind {
+allocas:
+  br i1 undef, label %cif_mask_all, label %cif_mixed_test_all
+
+cif_mask_all:                                     ; preds = %allocas
+  unreachable
+
+cif_mixed_test_all:                               ; preds = %allocas
+  br label %pl_loop.i964
+
+pl_loop.i964:                                     ; preds = %pl_loopend.i973, %cif_mixed_test_all
+  %0 = phi i32 [ %pl_nextlane.i971, %pl_loopend.i973 ], [ 0, %cif_mixed_test_all ]
+  br i1 undef, label %pl_dolane.i970, label %pl_loopend.i973
+
+pl_dolane.i970:                                   ; preds = %pl_loop.i964
+  %storeval.i.i969 = extractelement <4 x i8> <i8 0, i8 1, i8 2, i8 3>, i32 %0
+  store i8 %storeval.i.i969, i8* undef, align 1
+  br label %pl_loopend.i973
+
+pl_loopend.i973:                                  ; preds = %pl_dolane.i970, %pl_loop.i964
+  %pl_nextlane.i971 = add i32 %0, 1
+  %exitcond5 = icmp ne i32 %pl_nextlane.i971, 5
+  br i1 %exitcond5, label %pl_loop.i964, label %__scatter_base_offsets_i8.exit974
+
+__scatter_base_offsets_i8.exit974:                ; preds = %pl_loopend.i973
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/pr11361.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/pr11361.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/pr11361.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/pr11361.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt -loop-unroll -disable-output < %s
+; PR11361
+
+; This tests for an iterator invalidation issue.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @func_1() nounwind uwtable {
+entry:
+  br label %for.cond8.preheader
+
+for.cond8.preheader:                              ; preds = %for.inc15, %entry
+  %l_1264.04 = phi i32 [ 0, %entry ], [ %add.i, %for.inc15 ]
+  %l_1330.0.03 = phi i80 [ undef, %entry ], [ %ins.lcssa, %for.inc15 ]
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %for.cond8.preheader
+  %l_1330.0.12 = phi i80 [ %l_1330.0.03, %for.cond8.preheader ], [ %ins, %for.body9 ]
+  %storemerge1 = phi i32 [ 7, %for.cond8.preheader ], [ %sub, %for.body9 ]
+  %tmp = lshr i80 %l_1330.0.12, 8
+  %tmp1 = trunc i80 %tmp to i8
+  %inc12 = add i8 %tmp1, 1
+  %tmp2 = zext i8 %inc12 to i80
+  %tmp3 = shl nuw nsw i80 %tmp2, 8
+  %mask = and i80 %l_1330.0.12, -65281
+  %ins = or i80 %tmp3, %mask
+  %sub = add nsw i32 %storemerge1, -1
+  %tobool = icmp eq i32 %sub, 0
+  br i1 %tobool, label %for.inc15, label %for.body9
+
+for.inc15:                                        ; preds = %for.body9
+  %ins.lcssa = phi i80 [ %ins, %for.body9 ]
+  %sext = shl i32 %l_1264.04, 24
+  %conv.i = ashr exact i32 %sext, 24
+  %add.i = add nsw i32 %conv.i, 1
+  %cmp = icmp slt i32 %add.i, 3
+  br i1 %cmp, label %for.cond8.preheader, label %for.end16
+
+for.end16:                                        ; preds = %for.inc15
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/pr14167.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/pr14167.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/pr14167.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/pr14167.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define void @test1() nounwind {
+; Ensure that we don't crash when the trip count == -1.
+; CHECK-LABEL: @test1(
+entry:
+  br label %for.cond2.preheader
+
+for.cond2.preheader:                              ; preds = %for.end, %entry
+  br i1 false, label %middle.block, label %vector.ph
+
+vector.ph:                                        ; preds = %for.cond2.preheader
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  br i1 undef, label %middle.block.loopexit, label %vector.body
+
+middle.block.loopexit:                            ; preds = %vector.body
+  br label %middle.block
+
+middle.block:                                     ; preds = %middle.block.loopexit, %for.cond2.preheader
+  br i1 true, label %for.end, label %scalar.preheader
+
+scalar.preheader:                                 ; preds = %middle.block
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %scalar.preheader
+  %indvars.iv = phi i64 [ 16000, %scalar.preheader ], [ %indvars.iv.next, %for.body4 ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 16000
+  br i1 %exitcond, label %for.body4, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body4
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %middle.block
+  br i1 undef, label %for.cond2.preheader, label %for.end15
+
+for.end15:                                        ; preds = %for.end
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/pr18861.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/pr18861.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/pr18861.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/pr18861.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,86 @@
+; RUN: opt < %s -loop-unroll -indvars -disable-output
+
+ at b = external global i32, align 4
+
+; Test that LoopUnroll does not break LCSSA form.
+;
+; In this function we have a following CFG:
+;            ( entry )
+;                |
+;                v
+;         ( outer.header ) <--
+;                |             \
+;                v              |
+;     --> ( inner.header )      |
+;   /       /          \        |
+;   \      /            \       |
+;    \    v              v     /
+;  ( inner.latch )   ( outer.latch )
+;         |
+;         v
+;     ( exit )
+;
+; When the inner loop is unrolled, we inner.latch block has only one
+; predecessor and one successor, so it can be merged with exit block.
+; During the merge, however, we remove an LCSSA definition for
+; %storemerge1.lcssa, breaking LCSSA form for the outer loop.
+
+; Function Attrs: nounwind uwtable
+define void @fn1() #0 {
+entry:
+  br label %outer.header
+
+outer.header:                                     ; preds = %outer.latch, %entry
+  %storemerge1 = phi i32 [ 0, %entry ], [ %inc9, %outer.latch ]
+  br label %inner.header
+
+inner.header:                                     ; preds = %inner.latch, %outer.header
+  %storemerge = phi i32 [ %add, %inner.latch ], [ 0, %outer.header ]
+  %cmp = icmp slt i32 %storemerge, 1
+  br i1 %cmp, label %inner.latch, label %outer.latch
+
+inner.latch:                                      ; preds = %inner.header
+  %tobool4 = icmp eq i32 %storemerge, 0
+  %add = add nsw i32 %storemerge, 1
+  br i1 %tobool4, label %inner.header, label %exit
+
+exit:                                             ; preds = %inner.latch
+  %storemerge1.lcssa = phi i32 [ %storemerge1, %inner.latch ]
+  store i32 %storemerge1.lcssa, i32* @b, align 4
+  ret void
+
+outer.latch:                                      ; preds = %inner.header
+  %inc9 = add nsw i32 %storemerge1, 1
+  br label %outer.header
+}
+
+; This case is similar to the previous one, and has the same CFG.
+; The difference is that loop unrolling doesn't remove any LCSSA definition,
+; yet breaks LCSSA form for the outer loop. It happens because before unrolling
+; block inner.latch was inside outer loop (and consequently, didn't require
+; LCSSA definition for %x), but after unrolling it occurs out of the outer
+; loop, so we need to insert an LCSSA definition to keep LCSSA.
+
+; Function Attrs: nounwind uwtable
+define void @fn2() {
+entry:
+  br label %outer.header
+
+outer.header:
+  br label %inner.header
+
+inner.header:
+  %x = load i32, i32* undef, align 4
+  br i1 true, label %outer.latch, label %inner.latch
+
+inner.latch:
+  %inc6 = add nsw i32 %x, 1
+  store i32 %inc6, i32* undef, align 4
+  br i1 false, label %inner.header, label %exit
+
+exit:
+  ret void
+
+outer.latch:
+  br label %outer.header
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/pr27157.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/pr27157.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/pr27157.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/pr27157.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,53 @@
+; RUN: opt -loop-unroll -debug-only=loop-unroll -disable-output < %s
+; REQUIRES: asserts
+; Compile this test with debug flag on to verify domtree right after loop unrolling.
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+
+; PR27157
+define void @foo() {
+entry:
+  br label %loop_header
+loop_header:
+  %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+  br i1 undef, label %loop_latch, label %loop_exiting_bb1
+loop_exiting_bb1:
+  br i1 false, label %loop_exiting_bb2, label %exit1.loopexit
+loop_exiting_bb2:
+  br i1 false, label %loop_latch, label %bb
+bb:
+  br label %exit1
+loop_latch:
+  %iv_next = add nuw nsw i64 %iv, 1
+  %cmp = icmp ne i64 %iv_next, 2
+  br i1 %cmp, label %loop_header, label %exit2
+exit1.loopexit:
+  br label %exit1
+exit1:
+  ret void
+exit2:
+  ret void
+}
+
+define void @foo2() {
+entry:
+  br label %loop.header
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %latch ]
+  %iv.inc = add i32 %iv, 1
+  br i1 undef, label %diamond, label %latch
+diamond:
+  br i1 undef, label %left, label %right
+left:
+  br i1 undef, label %exit, label %merge
+right:
+  br i1 undef, label %exit, label %merge
+merge:
+  br label %latch
+latch:
+  %end.cond = icmp eq i32 %iv, 1
+  br i1 %end.cond, label %exit1, label %loop.header
+exit:
+  ret void
+exit1:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/pr28132.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/pr28132.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/pr28132.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/pr28132.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,77 @@
+; RUN: opt -loop-unroll -S < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+declare void @fn1(i8*)
+
+declare i1 @fn2(i8*, i8*)
+
+define void @fn4() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %i.05 = phi i8 [ 0, %entry ], [ %inc, %for.inc ]
+  store i8 undef, i8* undef, align 4
+  invoke void @fn1(i8* undef)
+          to label %call.i.noexc unwind label %ehcleanup
+
+call.i.noexc:                                     ; preds = %for.body
+  %call1.i2 = invoke i1 @fn2(i8* undef, i8* undef)
+          to label %call1.i.noexc unwind label %ehcleanup
+
+call1.i.noexc:                                    ; preds = %call.i.noexc
+  br i1 undef, label %if.then.i, label %if.end4.i
+
+if.then.i:                                        ; preds = %call1.i.noexc
+  %tmp1 = load i8, i8* undef, align 4
+  %tobool.i = icmp eq i8 undef, undef
+  br i1 undef, label %if.end4.i, label %if.then2.i
+
+if.then2.i:                                       ; preds = %if.then.i
+  %call3.i3 = invoke i1 @fn2(i8* undef, i8* null)
+          to label %call3.i.noexc unwind label %ehcleanup
+
+call3.i.noexc:                                    ; preds = %if.then2.i
+  br label %if.end4.i
+
+if.end4.i:                                        ; preds = %call3.i.noexc, %if.then.i, %call1.i.noexc
+  %tmp2 = load i8, i8* undef, align 4
+  br label %if.then6.i
+
+if.then6.i:                                       ; preds = %if.end4.i
+  %call7.i4 = invoke i1 @fn2(i8* undef, i8* null)
+          to label %call7.i.noexc unwind label %ehcleanup
+
+call7.i.noexc:                                    ; preds = %if.then6.i
+  br label %fn3
+
+fn3:                                              ; preds = %call7.i.noexc
+  %tmp3 = load i8, i8* undef, align 4
+  %inc.i = add nsw i8 undef, undef
+  store i8 undef, i8* undef, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %fn3
+  %inc = add nsw i8 %i.05, 1
+  %cmp = icmp slt i8 %inc, 6
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc
+  invoke void @throw()
+          to label %unreachable unwind label %ehcleanup
+
+ehcleanup:                                        ; preds = %for.end, %if.then6.i, %if.then2.i, %call.i.noexc, %for.body
+  %cp = cleanuppad within none []
+  cleanupret from %cp unwind to caller
+
+; CHECK: cleanuppad
+; CHECK-NOT: cleanuppad
+
+unreachable:                                      ; preds = %for.end
+  unreachable
+}
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @throw()

Added: llvm/trunk/test/Transforms/LoopUnroll/pr31718.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/pr31718.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/pr31718.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/pr31718.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,55 @@
+; RUN: opt -loop-unroll -verify-loop-lcssa -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at b = external local_unnamed_addr global i32, align 4
+
+; CHECK-LABEL: @main
+; CHECK: exit.loopexit:
+; CHECK: {{.*}} = phi i32 [ %d.0, %h3 ]
+; CHECK: br label %exit
+; CHECK: exit.loopexit1:
+; CHECK: {{.*}} = phi i32 [ %d.0, %h3.1 ]
+; CHECK: br label %exit
+
+define void @main() local_unnamed_addr #0 {
+ph1:
+  br label %h1
+
+h1:
+  %d.0 = phi i32 [ %1, %latch1 ], [ undef, %ph1 ]
+  br label %ph2
+
+ph2:
+  br label %h2
+
+h2:
+  %0 = phi i32 [ 0, %ph2 ], [ %inc, %latch2 ]
+  br label %h3
+
+h3:
+  br i1 undef, label %latch3, label %exit
+
+latch3:
+  br i1 false, label %exit3, label %h3
+
+exit3:
+  br label %latch2
+
+latch2:
+  %inc = add nuw nsw i32 %0, 1
+  %cmp = icmp slt i32 %inc, 2
+  br i1 %cmp, label %h2, label %exit2
+
+exit2:
+  br i1 undef, label %latch1, label %ph2
+
+latch1:                 ; preds = %exit2
+  %1 = load i32, i32* @b, align 4
+  br label %h1
+
+exit:
+  %d.0.lcssa = phi i32 [ %d.0, %h3 ]
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/pr33437.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/pr33437.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/pr33437.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/pr33437.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -loop-unroll -unroll-peel-count=1 < %s | FileCheck %s
+
+declare zeroext i8 @patatino()
+
+define fastcc void @tinky() {
+; CHECK-LABEL: @tinky(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    br label [[LOOP_PEEL_BEGIN:%.*]]
+; CHECK:       loop.peel.begin:
+; CHECK-NEXT:    br label [[LOOP_PEEL:%.*]]
+; CHECK:       loop.peel:
+; CHECK-NEXT:    [[CALL593_PEEL:%.*]] = tail call zeroext i8 @patatino()
+; CHECK-NEXT:    br i1 false, label [[LOOP_PEEL_NEXT:%.*]], label [[LOOPEXIT:%.*]]
+; CHECK:       loop.peel.next:
+; CHECK-NEXT:    br label [[LOOP_PEEL_NEXT1:%.*]]
+; CHECK:       loop.peel.next1:
+; CHECK-NEXT:    br label [[NEXT_PEEL_NEWPH:%.*]]
+; CHECK:       next.peel.newph:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[CALL593:%.*]] = tail call zeroext i8 @patatino()
+; CHECK-NEXT:    br label [[LOOPEXIT]]
+;
+entry:
+  br label %next
+
+loopexit:
+  ret void
+
+next:
+  br label %loop
+
+loop:
+  %a = phi i8 [ undef, %next ], [ %call593, %loop ]
+  %b = phi i32 [ 0, %next ], [ 1, %loop ]
+  %call593 = tail call zeroext i8 @patatino()
+  br i1 false, label %loop, label %loopexit
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/rebuild_lcssa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/rebuild_lcssa.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/rebuild_lcssa.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/rebuild_lcssa.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,190 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; This test shows how unrolling an inner loop could break LCSSA for an outer
+; loop, and there is no cheap way to recover it.
+;
+; In this case the inner loop, L3, is being unrolled. It only runs one
+; iteration, so unrolling basically means replacing
+;   br i1 true, label %exit, label %L3_header
+; with
+;   br label %exit
+;
+; However, this change messes up the loops structure: for instance, block
+; L3_body no longer belongs to L2. It becomes an exit block for L2, so LCSSA
+; phis for definitions in L2 should now be placed there. In particular, we need
+; to insert such a definition for %y1.
+
+; CHECK-LABEL: @foo1
+define void @foo1() {
+entry:
+  br label %L1_header
+
+L1_header:
+  br label %L2_header
+
+L2_header:
+  %y1 = phi i64 [ undef, %L1_header ], [ %x.lcssa, %L2_latch ]
+  br label %L3_header
+
+L3_header:
+  %y2 = phi i64 [ 0, %L3_latch ], [ %y1, %L2_header ]
+  %x = add i64 undef, -1
+  br i1 true, label %L2_latch, label %L3_body
+
+L2_latch:
+  %x.lcssa = phi i64 [ %x, %L3_header ]
+  br label %L2_header
+
+; CHECK:      L3_body:
+; CHECK-NEXT:   %y1.lcssa = phi i64 [ %y1, %L3_header ]
+L3_body:
+  store i64 %y1, i64* undef
+  br i1 false, label %L3_latch, label %L1_latch
+
+L3_latch:
+  br i1 true, label %exit, label %L3_header
+
+L1_latch:
+  %y.lcssa = phi i64 [ %y2, %L3_body ]
+  br label %L1_header
+
+exit:
+  ret void
+}
+
+; Additional tests for some corner cases.
+;
+; CHECK-LABEL: @foo2
+define void @foo2() {
+entry:
+  br label %L1_header
+
+L1_header:
+  br label %L2_header
+
+L2_header:
+  %a = phi i64 [ undef, %L1_header ], [ %dec_us, %L3_header ]
+  br label %L3_header
+
+L3_header:
+  %b = phi i64 [ 0, %L3_latch ], [ %a, %L2_header ]
+  %dec_us = add i64 undef, -1
+  br i1 true, label %L2_header, label %L3_break_to_L1
+
+; CHECK:      L3_break_to_L1:
+; CHECK-NEXT:   %a.lcssa = phi i64 [ %a, %L3_header ]
+L3_break_to_L1:
+  br i1 false, label %L3_latch, label %L1_latch
+
+L1_latch:
+  %b_lcssa = phi i64 [ %b, %L3_break_to_L1 ]
+  br label %L1_header
+
+L3_latch:
+  br i1 true, label %Exit, label %L3_header
+
+Exit:
+  ret void
+}
+
+; CHECK-LABEL: @foo3
+define void @foo3() {
+entry:
+  br label %L1_header
+
+L1_header:
+  %a = phi i8* [ %b, %L1_latch ], [ null, %entry ]
+  br i1 undef, label %L2_header, label %L1_latch
+
+L2_header:
+  br i1 undef, label %L2_latch, label %L1_latch
+
+; CHECK:      L2_latch:
+; CHECK-NEXT:   %a.lcssa = phi i8* [ %a, %L2_header ]
+L2_latch:
+  br i1 true, label %L2_exit, label %L2_header
+
+L1_latch:
+  %b = phi i8* [ undef, %L1_header ], [ null, %L2_header ]
+  br label %L1_header
+
+L2_exit:
+  %a_lcssa1 = phi i8* [ %a, %L2_latch ]
+  br label %Exit
+
+Exit:
+  %a_lcssa2 = phi i8* [ %a_lcssa1, %L2_exit ]
+  ret void
+}
+
+; PR26688
+; CHECK-LABEL: @foo4
+define i8 @foo4() {
+entry:
+  br label %L1_header
+
+L1_header:
+  %x = icmp eq i32 1, 0
+  br label %L2_header
+
+L2_header:
+  br label %L3_header
+
+L3_header:
+  br i1 true, label %L2_header, label %L3_exiting
+
+L3_exiting:
+  br i1 true, label %L3_body, label %L1_latch
+
+; CHECK:      L3_body:
+; CHECK-NEXT:   %x.lcssa = phi i1
+L3_body:
+  br i1 %x, label %L3_latch, label %L3_latch
+
+L3_latch:
+  br i1 false, label %L3_header, label %exit
+
+L1_latch:
+  br label %L1_header
+
+exit:
+  ret i8 0
+}
+
+; CHECK-LABEL: @foo5
+define void @foo5() {
+entry:
+  br label %outer
+
+outer:
+  br label %inner1
+
+; CHECK: inner1:
+; CHECK-NOT: br i1 true
+; CHECK: br label %inner2_indirect_exit
+inner1:
+  br i1 true, label %inner2_indirect_exit.preheader, label %inner1
+
+inner2_indirect_exit.preheader:
+  br label %inner2_indirect_exit
+
+inner2_indirect_exit:
+  %a = phi i32 [ %b, %inner2_latch ], [ undef, %inner2_indirect_exit.preheader ]
+  indirectbr i8* undef, [label %inner2_latch, label %inner3, label %outer_latch]
+
+inner2_latch:
+  %b = load i32, i32* undef, align 8
+  br label %inner2_indirect_exit
+
+inner3:
+  %a.lcssa = phi i32 [ %a.lcssa, %inner3 ], [ %a, %inner2_indirect_exit ]
+  br i1 true, label %outer_latch.loopexit, label %inner3
+
+outer_latch.loopexit:
+  %a.lcssa.lcssa = phi i32 [ %a.lcssa, %inner3 ]
+  br label %outer_latch
+
+outer_latch:
+  br label %outer
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/revisit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/revisit.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/revisit.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/revisit.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,156 @@
+; This test checks that nested loops are revisited in various scenarios when
+; unrolling. Note that if we ever start doing outer loop peeling a test case
+; for that should be added here that will look essentially like a hybrid of the
+; current two cases.
+;
+; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 \
+; RUN:     -passes='require<opt-remark-emit>,loop(unroll-full)' \
+; RUN:     | FileCheck %s
+;
+; Also run in a special mode that visits children.
+; RUN: opt < %s -disable-output -debug-pass-manager -unroll-revisit-child-loops 2>&1 \
+; RUN:     -passes='require<opt-remark-emit>,loop(unroll-full)' \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-CHILDREN
+
+; Basic test is fully unrolled and we revisit the post-unroll new sibling
+; loops, including the ones that used to be child loops.
+define void @full_unroll(i1* %ptr) {
+; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on full_unroll
+; CHECK-NOT: LoopFullUnrollPass
+
+entry:
+  br label %l0
+
+l0:
+  %cond.0 = load volatile i1, i1* %ptr
+  br i1 %cond.0, label %l0.0.ph, label %exit
+
+l0.0.ph:
+  br label %l0.0
+
+l0.0:
+  %iv = phi i32 [ %iv.next, %l0.0.latch ], [ 0, %l0.0.ph ]
+  %iv.next = add i32 %iv, 1
+  br label %l0.0.0.ph
+
+l0.0.0.ph:
+  br label %l0.0.0
+
+l0.0.0:
+  %cond.0.0.0 = load volatile i1, i1* %ptr
+  br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
+; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
+; CHECK-NOT: LoopFullUnrollPass
+
+l0.0.1.ph:
+  br label %l0.0.1
+
+l0.0.1:
+  %cond.0.0.1 = load volatile i1, i1* %ptr
+  br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
+; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
+; CHECK-NOT: LoopFullUnrollPass
+
+l0.0.latch:
+  %cmp = icmp slt i32 %iv.next, 2
+  br i1 %cmp, label %l0.0, label %l0.latch
+; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0
+; CHECK-NOT: LoopFullUnrollPass
+;
+; Unrolling occurs, so we visit what were the inner loops twice over. First we
+; visit their clones, and then we visit the original loops re-parented.
+; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.1.1<header>
+; CHECK-NOT: LoopFullUnrollPass
+; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0.1<header>
+; CHECK-NOT: LoopFullUnrollPass
+; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.1<header>
+; CHECK-NOT: LoopFullUnrollPass
+; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0<header>
+; CHECK-NOT: LoopFullUnrollPass
+
+l0.latch:
+  br label %l0
+; CHECK: LoopFullUnrollPass on Loop at depth 1 containing: %l0<header>
+; CHECK-NOT: LoopFullUnrollPass
+
+exit:
+  ret void
+}
+
+; Now we test forced runtime partial unrolling with metadata. Here we end up
+; duplicating child loops without changing their structure and so they aren't by
+; default visited, but will be visited with a special parameter.
+define void @partial_unroll(i32 %count, i1* %ptr) {
+; CHECK-LABEL: FunctionToLoopPassAdaptor{{.*}} on partial_unroll
+; CHECK-NOT: LoopFullUnrollPass
+
+entry:
+  br label %l0
+
+l0:
+  %cond.0 = load volatile i1, i1* %ptr
+  br i1 %cond.0, label %l0.0.ph, label %exit
+
+l0.0.ph:
+  br label %l0.0
+
+l0.0:
+  %iv = phi i32 [ %iv.next, %l0.0.latch ], [ 0, %l0.0.ph ]
+  %iv.next = add i32 %iv, 1
+  br label %l0.0.0.ph
+
+l0.0.0.ph:
+  br label %l0.0.0
+
+l0.0.0:
+  %cond.0.0.0 = load volatile i1, i1* %ptr
+  br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
+; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
+; CHECK-NOT: LoopFullUnrollPass
+
+l0.0.1.ph:
+  br label %l0.0.1
+
+l0.0.1:
+  %cond.0.0.1 = load volatile i1, i1* %ptr
+  br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
+; CHECK: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
+; CHECK-NOT: LoopFullUnrollPass
+
+l0.0.latch:
+  %cmp = icmp slt i32 %iv.next, %count
+  br i1 %cmp, label %l0.0, label %l0.latch, !llvm.loop !1
+; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0
+; CHECK-NOT: LoopFullUnrollPass
+;
+; Partial unrolling occurs which introduces both new child loops and new sibling
+; loops. We only visit the child loops in a special mode, not by default.
+; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0<header>
+; CHECK-CHILDREN-NOT: LoopFullUnrollPass
+; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1<header>
+; CHECK-CHILDREN-NOT: LoopFullUnrollPass
+; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.0.1<header>
+; CHECK-CHILDREN-NOT: LoopFullUnrollPass
+; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 3 containing: %l0.0.1.1<header>
+; CHECK-CHILDREN-NOT: LoopFullUnrollPass
+;
+; When we revisit children, we also revisit the current loop.
+; CHECK-CHILDREN: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0<header>
+; CHECK-CHILDREN-NOT: LoopFullUnrollPass
+;
+; Revisit the children of the outer loop that are part of the epilogue.
+; 
+; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0.epil<header>
+; CHECK-NOT: LoopFullUnrollPass
+; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.1.epil<header>
+; CHECK-NOT: LoopFullUnrollPass
+l0.latch:
+  br label %l0
+; CHECK: LoopFullUnrollPass on Loop at depth 1 containing: %l0<header>
+; CHECK-NOT: LoopFullUnrollPass
+
+exit:
+  ret void
+}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.unroll.count", i32 2}

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,128 @@
+; RUN: opt -loop-unroll -unroll-runtime -unroll-runtime-epilog -S %s | FileCheck %s
+
+; Test that epilogue is tagged with the same debug information as original loop body rather than original loop exit.
+
+; CHECK: for.body.i:
+; CHECK:   br i1 {{.*}}, label %lee1.exit.loopexit.unr-lcssa.loopexit, label %for.body.i, !dbg ![[LOOP_LOC:[0-9]+]]
+; CHECK: lee1.exit.loopexit.unr-lcssa.loopexit:
+; CHECK:   br label %lee1.exit.loopexit.unr-lcssa, !dbg ![[LOOP_LOC]]
+; CHECK: lee1.exit.loopexit.unr-lcssa:
+; CHECK:   %lcmp.mod = icmp ne i32 %xtraiter, 0, !dbg ![[LOOP_LOC]]
+; CHECK:   br i1 %lcmp.mod, label %for.body.i.epil.preheader, label %lee1.exit.loopexit, !dbg ![[LOOP_LOC]]
+; CHECK: for.body.i.epil.preheader:
+; CHECK:   br label %for.body.i.epil, !dbg ![[LOOP_LOC]]
+; CHECK: lee1.exit.loopexit:
+; CHECK:   br label %lee1.exit, !dbg ![[EXIT_LOC:[0-9]+]]
+
+; CHECK-DAG: ![[LOOP_LOC]] = !DILocation(line: 5, column: 3, scope: !{{.*}}, inlinedAt: !{{.*}})
+; CHECK-DAG: ![[EXIT_LOC]] = !DILocation(line: 11, column: 12, scope: !{{.*}}, inlinedAt: !{{.*}})
+
+; Function Attrs: nounwind readnone
+define i32 @goo(i32 %a, i32 %b) local_unnamed_addr #0 !dbg !8 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !15), !dbg !16
+  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !14, metadata !15), !dbg !17
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !18, metadata !15), !dbg !26
+  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !21, metadata !15), !dbg !28
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !15), !dbg !29
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !23, metadata !15), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !23, metadata !15), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !15), !dbg !29
+  %cmp7.i = icmp eq i32 %b, 0, !dbg !31
+  br i1 %cmp7.i, label %lee1.exit, label %for.body.i.preheader, !dbg !33
+
+for.body.i.preheader:                             ; preds = %entry
+  br label %for.body.i, !dbg !34
+
+for.body.i:                                       ; preds = %for.body.i.preheader, %for.body.i
+  %i.09.i = phi i32 [ %inc.i, %for.body.i ], [ 0, %for.body.i.preheader ]
+  %t.08.i = phi i32 [ %add1.i, %for.body.i ], [ 0, %for.body.i.preheader ]
+  %div.i = sdiv i32 %t.08.i, 2, !dbg !34
+  %add.i = add i32 %t.08.i, %a, !dbg !35
+  %add1.i = add i32 %add.i, %div.i, !dbg !36
+  tail call void @llvm.dbg.value(metadata i32 %add1.i, i64 0, metadata !22, metadata !15), !dbg !29
+  %inc.i = add nuw i32 %i.09.i, 1, !dbg !37
+  tail call void @llvm.dbg.value(metadata i32 %inc.i, i64 0, metadata !23, metadata !15), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 %inc.i, i64 0, metadata !23, metadata !15), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 %add1.i, i64 0, metadata !22, metadata !15), !dbg !29
+  %exitcond.i = icmp eq i32 %inc.i, %b, !dbg !31
+  br i1 %exitcond.i, label %lee1.exit.loopexit, label %for.body.i, !dbg !33, !llvm.loop !38
+
+lee1.exit.loopexit:                               ; preds = %for.body.i
+  %add1.i.lcssa = phi i32 [ %add1.i, %for.body.i ]
+  br label %lee1.exit, !dbg !41
+
+lee1.exit:                                        ; preds = %lee1.exit.loopexit, %entry
+  %t.0.lcssa.i = phi i32 [ 0, %entry ], [ %add1.i.lcssa, %lee1.exit.loopexit ]
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !44, metadata !15), !dbg !47
+  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !45, metadata !15), !dbg !48
+  %add.i4 = add nsw i32 %b, %a, !dbg !41
+  %sub.i = sub nsw i32 %a, %b, !dbg !49
+  %mul.i = mul nsw i32 %add.i4, %sub.i, !dbg !50
+  %add = add nsw i32 %t.0.lcssa.i, %mul.i, !dbg !51
+  ret i32 %add, !dbg !52
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+neon,+strict-align,+vfp3,-crypto,-d16,-fp-armv8,-fp-only-sp,-fp16,-vfp4" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Snapdragon LLVM ARM Compiler 4.0.5 (based on llvm.org 4.0+)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "t.c", directory: "/prj/llvm-arm/scratch1/zhaoshiz/bugs/debug-symbol")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 1, !"min_enum_size", i32 4}
+!7 = !{!"Snapdragon LLVM ARM Compiler 4.0.5 (based on llvm.org 4.0+)"}
+!8 = distinct !DISubprogram(name: "goo", scope: !1, file: !1, line: 23, type: !9, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11, !11, !11}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !{!13, !14}
+!13 = !DILocalVariable(name: "a", arg: 1, scope: !8, file: !1, line: 23, type: !11)
+!14 = !DILocalVariable(name: "b", arg: 2, scope: !8, file: !1, line: 23, type: !11)
+!15 = !DIExpression()
+!16 = !DILocation(line: 23, column: 14, scope: !8)
+!17 = !DILocation(line: 23, column: 21, scope: !8)
+!18 = !DILocalVariable(name: "a", arg: 1, scope: !19, file: !1, line: 3, type: !11)
+!19 = distinct !DISubprogram(name: "lee1", scope: !1, file: !1, line: 3, type: !9, isLocal: true, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !20)
+!20 = !{!18, !21, !22, !23}
+!21 = !DILocalVariable(name: "b", arg: 2, scope: !19, file: !1, line: 3, type: !11)
+!22 = !DILocalVariable(name: "t", scope: !19, file: !1, line: 4, type: !11)
+!23 = !DILocalVariable(name: "i", scope: !24, file: !1, line: 5, type: !25)
+!24 = distinct !DILexicalBlock(scope: !19, file: !1, line: 5, column: 3)
+!25 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!26 = !DILocation(line: 3, column: 22, scope: !19, inlinedAt: !27)
+!27 = distinct !DILocation(line: 24, column: 27, scope: !8)
+!28 = !DILocation(line: 3, column: 29, scope: !19, inlinedAt: !27)
+!29 = !DILocation(line: 4, column: 7, scope: !19, inlinedAt: !27)
+!30 = !DILocation(line: 5, column: 17, scope: !24, inlinedAt: !27)
+!31 = !DILocation(line: 5, column: 23, scope: !32, inlinedAt: !27)
+!32 = distinct !DILexicalBlock(scope: !24, file: !1, line: 5, column: 3)
+!33 = !DILocation(line: 5, column: 3, scope: !24, inlinedAt: !27)
+!34 = !DILocation(line: 6, column: 13, scope: !32, inlinedAt: !27)
+!35 = !DILocation(line: 6, column: 11, scope: !32, inlinedAt: !27)
+!36 = !DILocation(line: 6, column: 7, scope: !32, inlinedAt: !27)
+!37 = !DILocation(line: 5, column: 28, scope: !32, inlinedAt: !27)
+!38 = distinct !{!38, !39, !40}
+!39 = !DILocation(line: 5, column: 3, scope: !24)
+!40 = !DILocation(line: 6, column: 14, scope: !24)
+!41 = !DILocation(line: 11, column: 12, scope: !42, inlinedAt: !46)
+!42 = distinct !DISubprogram(name: "lee2", scope: !1, file: !1, line: 10, type: !9, isLocal: true, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !43)
+!43 = !{!44, !45}
+!44 = !DILocalVariable(name: "a", arg: 1, scope: !42, file: !1, line: 10, type: !11)
+!45 = !DILocalVariable(name: "b", arg: 2, scope: !42, file: !1, line: 10, type: !11)
+!46 = distinct !DILocation(line: 24, column: 40, scope: !8)
+!47 = !DILocation(line: 10, column: 22, scope: !42, inlinedAt: !46)
+!48 = !DILocation(line: 10, column: 29, scope: !42, inlinedAt: !46)
+!49 = !DILocation(line: 11, column: 20, scope: !42, inlinedAt: !46)
+!50 = !DILocation(line: 11, column: 16, scope: !42, inlinedAt: !46)
+!51 = !DILocation(line: 24, column: 38, scope: !8)
+!52 = !DILocation(line: 24, column: 3, scope: !8)

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-li.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-li.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-li.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-li.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt -S -loop-unroll -unroll-runtime -unroll-count=2 -verify-loop-info -pass-remarks=loop-unroll < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Verify that runtime-unrolling a top-level loop that has nested loops does not
+; make the unroller produce invalid loop-info.
+; CHECK: remark: {{.*}}: unrolled loop by a factor of 2 with run-time trip count
+; CHECK: @widget
+; CHECK: ret void
+define void @widget(double* %arg, double* %arg1, double* %p, i64* %q1, i64* %q2) local_unnamed_addr {
+entry:
+  br label %header.outer
+
+header.outer:                                     ; preds = %latch.outer, %entry
+  %tmp = phi double* [ %tmp8, %latch.outer ], [ %arg, %entry ]
+  br label %header.inner
+
+header.inner:                                     ; preds = %latch.inner, %header.outer
+  br i1 undef, label %latch.inner, label %latch.outer
+
+latch.inner:                                      ; preds = %header.inner
+  %tmp5 = load i64, i64* %q1, align 8
+  store i64 %tmp5, i64* %q2, align 8
+  %tmp6 = icmp eq double* %p, %arg
+  br label %header.inner
+
+latch.outer:                                      ; preds = %header.inner
+  store double 0.0, double* %p, align 8
+  %tmp8 = getelementptr inbounds double, double* %tmp, i64 1
+  %tmp9 = icmp eq double* %tmp8, %arg1
+  br i1 %tmp9, label %exit, label %header.outer
+
+exit:                                             ; preds = %latch.outer
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,275 @@
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false -unroll-runtime-multi-exit=true -unroll-count=4  -verify-dom-info -S | FileCheck %s
+
+; REQUIRES: asserts
+; The tests below are for verifying dom tree after runtime unrolling
+; with multiple exit/exiting blocks.
+
+; We explicitly set the unroll count so that expensiveTripCount computation is allowed.
+
+; mergedexit block has edges from loop exit blocks.
+define i64 @test1() {
+; CHECK-LABEL: test1(
+; CHECK-LABEL: headerexit:
+; CHECK-NEXT:    %addphi = phi i64 [ %add.iv, %header ], [ %add.iv.1, %header.1 ], [ %add.iv.2, %header.2 ], [ %add.iv.3, %header.3 ]
+; CHECK-NEXT:    br label %mergedexit
+; CHECK-LABEL: latchexit:
+; CHECK-NEXT:    %shftphi = phi i64 [ %shft, %latch ], [ %shft.1, %latch.1 ], [ %shft.2, %latch.2 ], [ %shft.3, %latch.3 ]
+; CHECK-NEXT:    br label %mergedexit
+; CHECK-LABEL: mergedexit:
+; CHECK-NEXT:    %retval = phi i64 [ %addphi, %headerexit ], [ %shftphi, %latchexit ]
+; CHECK-NEXT:    ret i64 %retval
+entry:
+  br label %preheader
+
+preheader:                                              ; preds = %bb
+  %trip = zext i32 undef to i64
+  br label %header
+
+header:                                              ; preds = %latch, %preheader
+  %iv = phi i64 [ 2, %preheader ], [ %add.iv, %latch ]
+  %add.iv = add nuw nsw i64 %iv, 2
+  %cmp1 = icmp ult i64 %add.iv, %trip
+  br i1 %cmp1, label %latch, label %headerexit
+
+latch:                                             ; preds = %header
+  %shft = ashr i64 %add.iv, 1
+  %cmp2 = icmp ult i64 %shft, %trip
+  br i1 %cmp2, label %header, label %latchexit
+
+headerexit:                                              ; preds = %header
+  %addphi = phi i64 [ %add.iv, %header ]
+  br label %mergedexit
+
+latchexit:                                              ; preds = %latch
+ %shftphi = phi i64 [ %shft, %latch ]
+  br label %mergedexit
+
+mergedexit:                                              ; preds = %latchexit, %headerexit
+  %retval = phi i64 [ %addphi, %headerexit ], [ %shftphi, %latchexit ]
+  ret i64 %retval
+}
+
+; mergedexit has edges from loop exit blocks and a block outside the loop.
+define  void @test2(i1 %cond, i32 %n) {
+; CHECK-LABEL: header.1:
+; CHECK-NEXT:    %add.iv.1 = add nuw nsw i64 %add.iv, 2
+; CHECK:         br i1 %cmp1.1, label %latch.1, label %headerexit
+; CHECK-LABEL: latch.3:
+; CHECK:         %cmp2.3 = icmp ult i64 %shft.3, %trip
+; CHECK-NEXT:    br i1 %cmp2.3, label %header, label %latchexit, !llvm.loop
+entry:
+  br i1 %cond, label %preheader, label %mergedexit
+
+preheader:                                              ; preds = %entry
+  %trip = zext i32 %n to i64
+  br label %header
+
+header:                                              ; preds = %latch, %preheader
+  %iv = phi i64 [ 2, %preheader ], [ %add.iv, %latch ]
+  %add.iv = add nuw nsw i64 %iv, 2
+  %cmp1 = icmp ult i64 %add.iv, %trip
+  br i1 %cmp1, label %latch, label %headerexit
+
+latch:                                             ; preds = %header
+  %shft = ashr i64 %add.iv, 1
+  %cmp2 = icmp ult i64 %shft, %trip
+  br i1 %cmp2, label %header, label %latchexit
+
+headerexit:                                              ; preds = %header
+  br label %mergedexit
+
+latchexit:                                              ; preds = %latch
+  br label %mergedexit
+
+mergedexit:                                              ; preds = %latchexit, %headerexit, %entry
+  ret void
+}
+
+
+; exitsucc is from loop exit block only.
+define i64 @test3(i32 %n) {
+; CHECK-LABEL: test3(
+; CHECK-LABEL:  headerexit:
+; CHECK-NEXT:     br label %exitsucc
+; CHECK-LABEL:  latchexit:
+; CHECK-NEXT:     %shftphi = phi i64 [ %shft, %latch ], [ %shft.1, %latch.1 ], [ %shft.2, %latch.2 ], [ %shft.3, %latch.3 ]
+; CHECK-NEXT:     ret i64 %shftphi
+; CHECK-LABEL:  exitsucc:
+; CHECK-NEXT:     ret i64 96
+entry:
+  br label %preheader
+
+preheader:                                              ; preds = %bb
+  %trip = zext i32 %n to i64
+  br label %header
+
+header:                                              ; preds = %latch, %preheader
+  %iv = phi i64 [ 2, %preheader ], [ %add.iv, %latch ]
+  %add.iv = add nuw nsw i64 %iv, 2
+  %cmp1 = icmp ult i64 %add.iv, %trip
+  br i1 %cmp1, label %latch, label %headerexit
+
+latch:                                             ; preds = %header
+  %shft = ashr i64 %add.iv, 1
+  %cmp2 = icmp ult i64 %shft, %trip
+  br i1 %cmp2, label %header, label %latchexit
+
+headerexit:                                              ; preds = %header
+  br label %exitsucc
+
+latchexit:                                              ; preds = %latch
+  %shftphi = phi i64 [ %shft, %latch ]
+  ret i64 %shftphi
+
+exitsucc:                                              ; preds = %headerexit
+  ret i64 96
+}
+
+; exit block (%default) has an exiting block and another exit block as predecessors.
+define void @test4(i16 %c3) {
+; CHECK-LABEL: test4
+
+; CHECK-LABEL: exiting.prol:
+; CHECK-NEXT:   switch i16 %c3, label %default.loopexit.loopexit1 [
+
+; CHECK-LABEL: exiting:
+; CHECK-NEXT:   switch i16 %c3, label %default.loopexit.loopexit [
+
+; CHECK-LABEL: default.loopexit.loopexit:
+; CHECK-NEXT:   br label %default.loopexit
+
+; CHECK-LABEL: default.loopexit.loopexit1:
+; CHECK-NEXT:   br label %default.loopexit
+
+; CHECK-LABEL: default.loopexit:
+; CHECK-NEXT:   br label %default
+preheader:
+  %c1 = zext i32 undef to i64
+  br label %header
+
+header:                                       ; preds = %latch, %preheader
+  %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %latch ]
+  br label %exiting
+
+exiting:                                           ; preds = %header
+  switch i16 %c3, label %default [
+    i16 45, label %otherexit
+    i16 95, label %latch
+  ]
+
+latch:                                          ; preds = %exiting
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %c2 = icmp ult i64 %indvars.iv.next, %c1
+  br i1 %c2, label %header, label %latchexit
+
+latchexit:                                          ; preds = %latch
+  ret void
+
+default:                                          ; preds = %otherexit, %exiting
+  ret void
+
+otherexit:                                           ; preds = %exiting
+  br label %default
+}
+
+; exit block (%exitB) has an exiting block and another exit block as predecessors.
+; exiting block comes from inner loop.
+define void @test5() {
+; CHECK-LABEL: test5
+; CHECK-LABEL: bb1:
+; CHECK-NEXT:   br i1 false, label %outerH.prol.preheader, label %outerH.prol.loopexit
+
+; CHECK-LABEL: outerH.prol.preheader:
+; CHECK-NEXT:   br label %outerH.prol
+
+; CHECK-LABEL: outerH.prol:
+; CHECK-NEXT:   %tmp4.prol = phi i32 [ %tmp6.prol, %outerLatch.prol ], [ undef, %outerH.prol.preheader ]
+; CHECK-NEXT:   %prol.iter = phi i32 [ 0, %outerH.prol.preheader ], [ %prol.iter.sub, %outerLatch.prol ]
+; CHECK-NEXT:   br label %innerH.prol
+bb:
+  %tmp = icmp sgt i32 undef, 79
+  br i1 %tmp, label %outerLatchExit, label %bb1
+
+bb1:                                              ; preds = %bb
+  br label %outerH
+
+outerH:                                              ; preds = %outerLatch, %bb1
+  %tmp4 = phi i32 [ %tmp6, %outerLatch ], [ undef, %bb1 ]
+  br label %innerH
+
+innerH:                                              ; preds = %innerLatch, %outerH
+  br i1 undef, label %innerexiting, label %otherexitB
+
+innerexiting:                                             ; preds = %innerH
+  br i1 undef, label %innerLatch, label %exitB
+
+innerLatch:                                             ; preds = %innerexiting
+  %tmp13 = fcmp olt double undef, 2.000000e+00
+  br i1 %tmp13, label %innerH, label %outerLatch
+
+outerLatch:                                              ; preds = %innerLatch
+  %tmp6 = add i32 %tmp4, 1
+  %tmp7 = icmp sgt i32 %tmp6, 79
+  br i1 %tmp7, label %outerLatchExit, label %outerH
+
+outerLatchExit:                                              ; preds = %outerLatch, %bb
+  ret void
+
+exitB:                                             ; preds = %innerexiting, %otherexitB
+  ret void
+
+otherexitB:                                              ; preds = %innerH
+  br label %exitB
+
+}
+
+; Blocks reachable from exits (not_zero44) have the IDom as the block within the loop (Header).
+; Update the IDom to the preheader.
+define void @test6() {
+; CHECK-LABEL: test6
+; CHECK-LABEL: header.prol.preheader:
+; CHECK-NEXT:    br label %header.prol
+
+; CHECK-LABEL: header.prol:
+; CHECK-NEXT:    %indvars.iv.prol = phi i64 [ undef, %header.prol.preheader ], [ %indvars.iv.next.prol, %latch.prol ]
+; CHECK-NEXT:    %prol.iter = phi i64 [ 1, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ]
+; CHECK-NEXT:    br i1 false, label %latch.prol, label %otherexit.loopexit1
+
+; CHECK-LABEL: header.prol.loopexit.unr-lcssa:
+; CHECK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.prol, %latch.prol ]
+; CHECK-NEXT:    br label %header.prol.loopexit
+
+; CHECK-LABEL: header.prol.loopexit:
+; CHECK-NEXT:    %indvars.iv.unr = phi i64 [ undef, %entry ], [ %indvars.iv.unr.ph, %header.prol.loopexit.unr-lcssa ]
+; CHECK-NEXT:    br i1 true, label %latchexit, label %entry.new
+
+; CHECK-LABEL: entry.new:
+; CHECK-NEXT:    br label %header
+entry:
+  br label %header
+
+header:                                          ; preds = %latch, %entry
+  %indvars.iv = phi i64 [ undef, %entry ], [ %indvars.iv.next, %latch ]
+  br i1 undef, label %latch, label %otherexit
+
+latch:                                         ; preds = %header
+  %indvars.iv.next = add nsw i64 %indvars.iv, 2
+  %0 = icmp slt i64 %indvars.iv.next, 616
+  br i1 %0, label %header, label %latchexit
+
+latchexit:                                          ; preds = %latch
+  br label %latchexitsucc
+
+otherexit:                                 ; preds = %header
+  br label %otherexitsucc
+
+otherexitsucc:                                          ; preds = %otherexit
+  br label %not_zero44
+
+not_zero44:                                       ; preds = %latchexitsucc, %otherexitsucc
+  unreachable
+
+latchexitsucc:                                      ; preds = %latchexit
+  br label %not_zero44
+}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,646 @@
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-loop-lcssa -verify-dom-info -verify-loop-info -S | FileCheck %s -check-prefix=EPILOG-NO-IC
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-loop-lcssa -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-loop-lcssa -verify-dom-info -verify-loop-info -instcombine
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false -unroll-runtime-multi-exit=true -verify-loop-lcssa -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s -check-prefix=PROLOG
+; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-runtime-epilog=false -unroll-count=2 -unroll-runtime-multi-exit=true -verify-loop-lcssa -verify-dom-info -verify-loop-info -instcombine
+
+; REQUIRES: asserts
+
+; the third and fifth RUNs generate an epilog/prolog remainder block for all the test
+; cases below (it does not generate a loop).
+
+; test with three exiting and three exit blocks.
+; none of the exit blocks have successors
+define void @test1(i64 %trip, i1 %cond) {
+; EPILOG: test1(
+; EPILOG-NEXT:  entry:
+; EPILOG-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; EPILOG-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; EPILOG-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; EPILOG-NEXT:    br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
+; EPILOG:       entry.new:
+; EPILOG-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]]
+; EPILOG-NEXT:    br label [[LOOP_HEADER:%.*]]
+; EPILOG:  loop_latch.epil:
+; EPILOG-NEXT:     %epil.iter.sub = add i64 %epil.iter, -1
+; EPILOG-NEXT:     %epil.iter.cmp = icmp eq i64 %epil.iter.sub, 0
+; EPILOG-NEXT:     br i1 %epil.iter.cmp, label %exit2.loopexit.epilog-lcssa, label %loop_header.epil
+; EPILOG:  loop_latch.7:
+; EPILOG-NEXT:     %niter.nsub.7 = add i64 %niter, -8
+; EPILOG-NEXT:     %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; EPILOG-NEXT:     br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header
+
+; PROLOG: test1(
+; PROLOG-NEXT:  entry:
+; PROLOG-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; PROLOG-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; PROLOG-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[XTRAITER]], 0
+; PROLOG-NEXT:    br i1 [[TMP1]], label %loop_header.prol.loopexit, label %loop_header.prol.preheader
+; PROLOG:       loop_header.prol:
+; PROLOG-NEXT:    %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ [[XTRAITER]], %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ]
+; PROLOG-NEXT:    br i1 %cond, label %loop_latch.prol, label %loop_exiting_bb1.prol
+; PROLOG:       loop_latch.prol:
+; PROLOG-NEXT:    %iv_next.prol = add i64 %iv.prol, 1
+; PROLOG-NEXT:    %prol.iter.sub = add i64 %prol.iter, -1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp eq i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %loop_header.prol.loopexit.unr-lcssa, label %loop_header.prol
+; PROLOG:  loop_latch.7:
+; PROLOG-NEXT:     %iv_next.7 = add i64 %iv, 8
+; PROLOG-NEXT:     %cmp.7 = icmp eq i64 %iv_next.7, %trip
+; PROLOG-NEXT:     br i1 %cmp.7, label %exit2.loopexit.unr-lcssa, label %loop_header
+entry:
+  br label %loop_header
+
+loop_header:
+  %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+  br i1 %cond, label %loop_latch, label %loop_exiting_bb1
+
+loop_exiting_bb1:
+  br i1 false, label %loop_exiting_bb2, label %exit1
+
+loop_exiting_bb2:
+  br i1 false, label %loop_latch, label %exit3
+
+exit3:
+  ret void
+
+loop_latch:
+  %iv_next = add i64 %iv, 1
+  %cmp = icmp ne i64 %iv_next, %trip
+  br i1 %cmp, label %loop_header, label %exit2.loopexit
+
+exit1:
+ ret void
+
+exit2.loopexit:
+  ret void
+}
+
+
+; test with three exiting and two exit blocks.
+; The non-latch exit block has 2 unique predecessors.
+; There are 2 values passed to the exit blocks that are calculated at every iteration.
+; %sum.02 and %add. Both of these are incoming values for phi from every exiting
+; unrolled block.
+define i32 @test2(i32* nocapture %a, i64 %n) {
+; EPILOG: test2(
+; EPILOG: for.exit2.loopexit:
+; EPILOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ],
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG: for.exit2.loopexit2:
+; EPILOG-NEXT:    %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG: for.exit2:
+; EPILOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; EPILOG-NEXT:    ret i32 %retval
+; EPILOG: %niter.nsub.7 = add i64 %niter, -8
+
+; PROLOG: test2(
+; PROLOG: for.exit2.loopexit:
+; PROLOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ],
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG: for.exit2.loopexit1:
+; PROLOG-NEXT:    %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ]
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG: for.exit2:
+; PROLOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ]
+; PROLOG-NEXT:    ret i32 %retval
+; PROLOG: %indvars.iv.next.7 = add i64 %indvars.iv, 8
+
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  br i1 false, label %for.exit2, label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %for.exit2, label %for.body
+
+for.body:
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.end, label %header
+
+for.end:                                          ; preds = %for.body
+  %sum.0.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+
+for.exit2:
+  %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ]
+  ret i32 %retval
+}
+
+; test with two exiting and three exit blocks.
+; the non-latch exiting block has a switch.
+define void @test3(i64 %trip, i64 %add) {
+; EPILOG: test3(
+; EPILOG-NEXT:  entry:
+; EPILOG-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; EPILOG-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; EPILOG-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; EPILOG-NEXT:    br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]]
+; EPILOG:       entry.new:
+; EPILOG-NEXT:    %unroll_iter = sub i64 [[TRIP]], [[XTRAITER]]
+; EPILOG-NEXT:    br label [[LOOP_HEADER:%.*]]
+; EPILOG:  loop_header:
+; EPILOG-NEXT:     %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ]
+; EPILOG-NEXT:     %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; EPILOG:  loop_exiting_bb1.7:
+; EPILOG-NEXT:     switch i64 %sum.next.6, label %loop_latch.7
+; EPILOG:  loop_latch.7:
+; EPILOG-NEXT:     %sum.next.7 = add i64 %sum.next.6, %add
+; EPILOG-NEXT:     %niter.nsub.7 = add i64 %niter, -8
+; EPILOG-NEXT:     %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; EPILOG-NEXT:     br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header
+
+; PROLOG:  test3(
+; PROLOG-NEXT:  entry:
+; PROLOG-NEXT:    [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1
+; PROLOG-NEXT:    [[XTRAITER:%.*]] = and i64 [[TRIP]], 7
+; PROLOG-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[XTRAITER]], 0
+; PROLOG-NEXT:    br i1 [[TMP1]], label %loop_header.prol.loopexit, label %loop_header.prol.preheader
+; PROLOG:  loop_header:
+; PROLOG-NEXT:     %iv = phi i64 [ %iv.unr, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
+; PROLOG-NEXT:     %sum = phi i64 [ %sum.unr, %entry.new ], [ %sum.next.7, %loop_latch.7 ]
+; PROLOG:  loop_exiting_bb1.7:
+; PROLOG-NEXT:     switch i64 %sum.next.6, label %loop_latch.7
+; PROLOG:  loop_latch.7:
+; PROLOG-NEXT:     %iv_next.7 = add nsw i64 %iv, 8
+; PROLOG-NEXT:     %sum.next.7 = add i64 %sum.next.6, %add
+; PROLOG-NEXT:     %cmp.7 = icmp eq i64 %iv_next.7, %trip
+; PROLOG-NEXT:     br i1 %cmp.7, label %exit2.loopexit.unr-lcssa, label %loop_header
+entry:
+  br label %loop_header
+
+loop_header:
+  %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+  %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ]
+  br i1 undef, label %loop_latch, label %loop_exiting_bb1
+
+loop_exiting_bb1:
+   switch i64 %sum, label %loop_latch [
+     i64 24, label %exit1
+     i64 42, label %exit3
+   ]
+
+exit3:
+  ret void
+
+loop_latch:
+  %iv_next = add nuw nsw i64 %iv, 1
+  %sum.next = add i64 %sum, %add
+  %cmp = icmp ne i64 %iv_next, %trip
+  br i1 %cmp, label %loop_header, label %exit2.loopexit
+
+exit1:
+ ret void
+
+exit2.loopexit:
+  ret void
+}
+
+; FIXME: Support multiple exiting blocks to the same latch exit block.
+; Three exiting blocks where header and latch exit to same LatchExit.
+define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
+; EPILOG: hdr_latch_same_exit(
+; EPILOG-NOT: .unr
+; EPILOG-NOT: .epil
+
+; PROLOG: hdr_latch_same_exit(
+; PROLOG-NOT: .unr
+; PROLOG-NOT: .prol
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+  br i1 %cond, label %latchExit, label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %for.exit2, label %latch
+
+latch:                                         ; preds = %latch, %entry
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %latchExit, label %header
+
+latchExit:                                          ; preds = %latch, %entry
+  %result = phi i32 [ 0, %header ], [ %add, %latch ]
+  ret i32 %result
+
+for.exit2:
+  ret i32 42
+}
+
+; Two exiting blocks to latch where the exiting blocks are Latch and a
+; non-header
+; FIXME: We should unroll this loop.
+define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
+; EPILOG: otherblock_latch_same_exit(
+; EPILOG-NOT: .unr
+; EPILOG-NOT: .epil
+
+; PROLOG: otherblock_latch_same_exit(
+; PROLOG-NOT: .unr
+; PROLOG-NOT: .prol
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+  br i1 %cond, label %for.exit2, label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %latchExit, label %latch
+
+latch:                                         ; preds = %latch, %entry
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %latchExit, label %header
+
+latchExit:                                          ; preds = %latch, %entry
+  %result = phi i32 [ 2, %for.exiting_block ], [ %add, %latch ]
+  ret i32 %result
+
+for.exit2:
+  ret i32 42
+}
+
+; Two exiting blocks to latch where the exiting blocks are Latch and a
+; non-header
+; Same as above test except the incoming value for latch Phi is from the header
+; FIXME: We should be able to runtime unroll.
+define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
+; EPILOG: otherblock_latch_same_exit2(
+; EPILOG-NOT: .unr
+; EPILOG-NOT: .epil
+
+; PROLOG: otherblock_latch_same_exit2(
+; PROLOG-NOT: .unr
+; PROLOG-NOT: .prol
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+  br i1 %cond, label %for.exit2, label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %latchExit, label %latch
+
+latch:                                         ; preds = %latch, %entry
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %latchExit, label %header
+
+latchExit:                                          ; preds = %latch, %entry
+  %result = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %latch ]
+  ret i32 %result
+
+for.exit2:
+  ret i32 42
+}
+
+; Two exiting blocks to latch where the exiting blocks are Latch and a
+; non-header
+; Same as above test except the incoming value for cloned latch Phi is from the
+; for.exiting_block.
+; FIXME: We should be able to runtime unroll.
+define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
+; EPILOG: otherblock_latch_same_exit3(
+; EPILOG-NOT: .unr
+; EPILOG-NOT: .epil
+
+; PROLOG: otherblock_latch_same_exit3(
+; PROLOG-NOT: .unr
+; PROLOG-NOT: .prol
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+  br i1 %cond, label %for.exit2, label %for.exiting_block
+
+for.exiting_block:
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.02
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %latchExit, label %latch
+
+latch:                                         ; preds = %latch, %entry
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %latchExit, label %header
+
+latchExit:                                          ; preds = %latch, %entry
+  %result = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %latch ]
+  ret i32 %result
+
+for.exit2:
+  ret i32 42
+}
+
+; FIXME: Support multiple exiting blocks to the unique exit block (LatchExit).
+; Only 2 blocks in loop: header and latch where both exit to same LatchExit.
+define void @unique_exit(i32 %arg) {
+; EPILOG: unique_exit(
+; EPILOG-NOT: .unr
+; EPILOG-NOT: .epil
+
+; PROLOG: unique_exit(
+; PROLOG-NOT: .unr
+; PROLOG-NOT: .prol
+entry:
+  %tmp = icmp sgt i32 undef, %arg
+  br i1 %tmp, label %preheader, label %returnblock
+
+preheader:                                 ; preds = %entry
+  br label %header
+
+header:                                           ; preds = %preheader, %latch
+  %tmp4 = phi i32 [ %inc, %latch ], [ %arg, %preheader ]
+  %inc = add nsw i32 %tmp4, 1
+  br i1 true, label %latchExit, label %latch
+
+latch:                                            ; preds = %header
+  %cmp = icmp slt i32 %inc, undef
+  br i1 %cmp, label %header, label %latchExit
+
+latchExit:                                ; preds = %header, %latch
+  %tmp2.ph = phi i32 [ %tmp4, %header ], [ -1, %latch ]
+  br label %returnblock
+
+returnblock:                                         ; preds = %latchExit, %entry
+  %tmp2 = phi i32 [ -1, %entry ], [ %tmp2.ph, %latchExit ]
+  ret void
+}
+
+; two exiting and two exit blocks.
+; the non-latch exiting block has duplicate edges to the non-latch exit block.
+define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
+; EPILOG: test5(
+; EPILOG:   exit1.loopexit:
+; EPILOG-NEXT:      %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ],
+; EPILOG-NEXT:      br label %exit1
+; EPILOG:   exit1.loopexit2:
+; EPILOG-NEXT:      %ivy.epil = add i64 %iv.epil, %add
+; EPILOG-NEXT:      br label %exit1
+; EPILOG:   exit1:
+; EPILOG-NEXT:      %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.epil, %exit1.loopexit2 ]
+; EPILOG-NEXT:      ret i64 %result
+; EPILOG:   loop_latch.7:
+; EPILOG:      %niter.nsub.7 = add i64 %niter, -8
+
+; PROLOG: test5(
+; PROLOG:   exit1.loopexit:
+; PROLOG-NEXT:      %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ],
+; PROLOG-NEXT:      br label %exit1
+; PROLOG:   exit1.loopexit1:
+; PROLOG-NEXT:      %ivy.prol = add i64 %iv.prol, %add
+; PROLOG-NEXT:      br label %exit1
+; PROLOG:   exit1:
+; PROLOG-NEXT:      %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.prol, %exit1.loopexit1 ]
+; PROLOG-NEXT:      ret i64 %result
+; PROLOG:   loop_latch.7:
+; PROLOG:      %iv_next.7 = add nsw i64 %iv, 8
+entry:
+  br label %loop_header
+
+loop_header:
+  %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+  %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ]
+  br i1 %cond, label %loop_latch, label %loop_exiting
+
+loop_exiting:
+   %ivy = add i64 %iv, %add
+   switch i64 %sum, label %loop_latch [
+     i64 24, label %exit1
+     i64 42, label %exit1
+   ]
+
+loop_latch:
+  %iv_next = add nuw nsw i64 %iv, 1
+  %sum.next = add i64 %sum, %add
+  %cmp = icmp ne i64 %iv_next, %trip
+  br i1 %cmp, label %loop_header, label %latchexit
+
+exit1:
+ %result = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ]
+ ret i64 %result
+
+latchexit:
+  ret i64 %sum.next
+}
+
+; test when exit blocks have successors.
+define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
+; EPILOG: test6(
+; EPILOG:   for.exit2.loopexit:
+; EPILOG-NEXT:      %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ],
+; EPILOG-NEXT:      br label %for.exit2
+; EPILOG:   for.exit2.loopexit2:
+; EPILOG-NEXT:      %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; EPILOG-NEXT:      br label %for.exit2
+; EPILOG:   for.exit2:
+; EPILOG-NEXT:      %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; EPILOG-NEXT:      br i1 %cond, label %exit_true, label %exit_false
+; EPILOG:   latch.7:
+; EPILOG:           %niter.nsub.7 = add i64 %niter, -8
+
+; PROLOG: test6(
+; PROLOG:   for.exit2.loopexit:
+; PROLOG-NEXT:      %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ],
+; PROLOG-NEXT:      br label %for.exit2
+; PROLOG:   for.exit2.loopexit1:
+; PROLOG-NEXT:      %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ]
+; PROLOG-NEXT:      br label %for.exit2
+; PROLOG:   for.exit2:
+; PROLOG-NEXT:      %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ]
+; PROLOG-NEXT:      br i1 %cond, label %exit_true, label %exit_false
+; PROLOG: latch.7:
+; PROLOG:   %indvars.iv.next.7 = add i64 %indvars.iv, 8
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+  br i1 false, label %for.exit2, label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %for.exit2, label %latch
+
+latch:
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %load = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %load, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %latch_exit, label %header
+
+latch_exit:
+  %sum.0.lcssa = phi i32 [ %add, %latch ]
+  ret i32 %sum.0.lcssa
+
+for.exit2:
+  %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ]
+  %addx = add i32 %retval, %x
+  br i1 %cond, label %exit_true, label %exit_false
+
+exit_true:
+  ret i32 %retval
+
+exit_false:
+  ret i32 %addx
+}
+
+; test when value in exit block does not have VMap.
+define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
+; EPILOG-NO-IC: test7(
+; EPILOG-NO-IC: loopexit1.loopexit:
+; EPILOG-NO-IC-NEXT:  %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ], [ %shft, %latch.1 ], [ %shft, %latch.2 ], [ %shft, %latch.3 ], [ %shft, %latch.4 ], [ %shft, %latch.5 ], [ %shft, %latch.6 ]
+; EPILOG-NO-IC-NEXT:  br label %loopexit1
+; EPILOG-NO-IC: loopexit1.loopexit1:
+; EPILOG-NO-IC-NEXT:  %sext3.ph2 = phi i32 [ %shft, %header.epil ]
+; EPILOG-NO-IC-NEXT:  br label %loopexit1
+; EPILOG-NO-IC: loopexit1:
+; EPILOG-NO-IC-NEXT:   %sext3 = phi i32 [ %sext3.ph, %loopexit1.loopexit ], [ %sext3.ph2, %loopexit1.loopexit1 ]
+bb:
+  %tmp = icmp slt i32 undef, 2
+  %sext = sext i32 undef to i64
+  %shft = ashr exact i32 %arg, 16
+  br i1 %tmp, label %loopexit2, label %preheader
+
+preheader:                                              ; preds = %bb2
+  br label %header
+
+header:                                              ; preds = %latch, %preheader
+  %tmp6 = phi i64 [ 1, %preheader ], [ %add, %latch ]
+  br i1 false, label %loopexit1, label %latch
+
+latch:                                              ; preds = %header
+  %add = add nuw nsw i64 %tmp6, 1
+  %tmp9 = icmp slt i64 %add, %sext
+  br i1 %tmp9, label %header, label %latchexit
+
+latchexit:                                             ; preds = %latch
+  unreachable
+
+loopexit2:                                             ; preds = %bb2
+ ret i32 %shft
+
+loopexit1:                                             ; preds = %header
+  %sext3 = phi i32 [ %shft, %header ]
+  ret i32 %sext3
+}
+
+; Nested loop and inner loop is unrolled
+; FIXME: we cannot unroll with epilog remainder currently, because 
+; the outer loop does not contain the epilog preheader and epilog exit (while
+; infact it should). This causes us to choke up on LCSSA form being incorrect in
+; outer loop. However, the exit block where LCSSA fails, is infact still within
+; the outer loop. For now, we just bail out in presence of outer loop and epilog
+; loop is generated.
+; The outer loop header is the preheader for the inner loop and the inner header
+; branches back to the outer loop.
+define void @test8() {
+; EPILOG: test8(
+; EPILOG-NOT: niter
+
+; PROLOG: test8(
+; PROLOG: outerloop:
+; PROLOG-NEXT: phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit ]
+; PROLOG:      %lcmp.mod = icmp eq i64
+; PROLOG-NEXT: br i1 %lcmp.mod, label %innerH.prol.loopexit, label %innerH.prol.preheader
+; PROLOG: latch.6:
+; PROLOG-NEXT: %tmp4.7 = add nsw i64 %tmp3, 8
+; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7
+; PROLOG: latch.7
+; PROLOG-NEXT: %tmp6.7 = icmp ult i64 %tmp4.7, 100
+; PROLOG-NEXT: br i1 %tmp6.7, label %innerH, label %exit.unr-lcssa
+bb:
+  br label %outerloop
+
+outerloop:                                              ; preds = %innerH, %bb
+  %tmp = phi i64 [ 3, %bb ], [ 0, %innerH ]
+  br label %innerH
+
+innerH:                                              ; preds = %latch, %outerloop
+  %tmp3 = phi i64 [ %tmp4, %latch ], [ %tmp, %outerloop ]
+  %tmp4 = add nuw nsw i64 %tmp3, 1
+  br i1 false, label %outerloop, label %latch
+
+latch:                                              ; preds = %innerH
+  %tmp6 = icmp ult i64 %tmp4, 100
+  br i1 %tmp6, label %innerH, label %exit
+
+exit:                                              ; preds = %latch
+  ret void
+}
+
+declare i8 addrspace(1)* @foo(i32)
+; inner loop prolog unrolled
+; a value from outer loop is used in exit block of inner loop.
+; Don't create VMap entries for such values (%trip).
+define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
+; PROLOG: test9(
+; PROLOG: header.prol:
+; PROLOG-NEXT: %phi.prol = phi i64 [ 0, %header.prol.preheader ], [ %iv.next.prol, %latch.prol ]
+; PROLOG: latch.prol:
+; PROLOG-NOT: trip
+; PROLOG:     br i1 %prol.iter.cmp, label %header.prol.loopexit.unr-lcssa, label %header.prol
+bb:
+  br label %outerloopHdr
+
+outerloopHdr:                                              ; preds = %outerLatch, %bb
+  %trip = add i32 %n, -1
+  %outercnd = icmp slt i32 0, %trip
+  br i1 %outercnd, label %preheader, label %outerLatch
+
+preheader:                                              ; preds = %outerloopHdr
+  %tmp4 = zext i32 0 to i64
+  br label %header
+
+header:                                              ; preds = %latch, %preheader
+  %phi = phi i64 [ %tmp4, %preheader ], [ %iv.next, %latch ]
+  %tmp7 = trunc i64 %phi to i32
+  br i1 true, label %latch, label %innerexit
+
+innerexit:                                              ; preds = %header
+  %tmp9 = call i8 addrspace(1)* @foo(i32 %trip)
+  ret i8 addrspace(1)* %tmp9
+
+latch:                                             ; preds = %header
+  %tmp11 = add nsw i32 %tmp7, 1
+  %innercnd = icmp slt i32 %tmp11, %trip
+  %iv.next = add nuw nsw i64 %phi, 1
+  br i1 %innercnd, label %header, label %outerLatch
+
+outerLatch:                                             ; preds = %latch, %outerloopHdr
+  br label %outerloopHdr
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-non-exiting-latch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-non-exiting-latch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-non-exiting-latch.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-non-exiting-latch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; REQUIRES: asserts
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-allow-remainder=true -unroll-count=4
+
+; Make sure that the runtime unroll does not break with a non-exiting latch.
+define i32 @test(i32* %a, i32* %b, i32* %c, i64 %n) {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %while.body ]
+  %cmp = icmp slt i64 %i.0, %n
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.0
+  %0 = load i32, i32* %arrayidx
+  %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 %i.0
+  %1 = load i32, i32* %arrayidx1
+  %mul = mul nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.0
+  store i32 %mul, i32* %arrayidx2
+  %inc = add nsw i64 %i.0, 1
+  br label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret i32 0
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,285 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true  | FileCheck %s -check-prefixes=EPILOG,COMMON
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
+;
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=true  | FileCheck %s -check-prefixes=EPILOG,COMMON
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
+;
+; Restricted versions of unroll (unroll<peeling;noruntime>, unroll-full) should not be doing runtime unrolling
+; even if it is globally enabled through -unroll-runtime option
+;
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=true  | FileCheck %s -check-prefixes=NOEPILOG,COMMON
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=true  | FileCheck %s -check-prefixes=NOEPILOG,COMMON
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Tests for unrolling loops with run-time trip counts
+
+; COMMON-LABEL: @test(
+
+; EPILOG: %xtraiter = and i32 %n
+; EPILOG:  %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG:  br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
+
+; NOEPILOG-NOT: %xtraiter = and i32 %n
+
+; PROLOG: %xtraiter = and i32 %n
+; PROLOG:  %lcmp.mod = icmp ne i32 %xtraiter, 0
+; PROLOG:  br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
+
+; NOPROLOG-NOT: %xtraiter = and i32 %n
+
+; EPILOG: for.body.epil:
+; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ],  [ %indvars.iv.unr, %for.body.epil.preheader ]
+; EPILOG:  %epil.iter.sub = sub i32 %epil.iter, 1
+; EPILOG:  %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
+; EPILOG:  br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
+
+; NOEPILOG: for.body:
+; NOEPILOG-NOT: for.body.epil:
+
+; PROLOG: for.body.prol:
+; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
+; PROLOG:  %prol.iter.sub = sub i32 %prol.iter, 1
+; PROLOG:  %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
+; PROLOG:  br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop !0
+
+; NOPROLOG: for.body:
+; NOPROLOG-NOT: for.body.prol:
+
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+
+; Still try to completely unroll loops with compile-time trip counts
+; even if the -unroll-runtime is specified
+
+; COMMON-LABEL: @test1(
+; COMMON: for.body:
+; COMMON-NOT: for.body.epil:
+; COMMON-NOT: for.body.prol:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+; This is test 2007-05-09-UnknownTripCount.ll which can be unrolled now
+; if the -unroll-runtime option is turned on
+
+; COMMON-LABEL: @foo(
+; EPILOG: bb72.2:
+; PROLOG: bb72.2:
+; NOEPILOG-NOT: bb72.2:
+; NOPROLOG-NOT: bb72.2:
+
+define void @foo(i32 %trips) {
+entry:
+        br label %cond_true.outer
+
+cond_true.outer:
+        %indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]
+        br label %bb72
+
+bb72:
+        %indvar.next2 = add i32 %indvar1.ph, 1
+        %exitcond3 = icmp eq i32 %indvar.next2, %trips
+        br i1 %exitcond3, label %cond_true138, label %cond_true.outer
+
+cond_true138:
+        ret void
+}
+
+
+; Test run-time unrolling for a loop that counts down by -2.
+
+; COMMON-LABEL: @down(
+; EPILOG: for.body.epil:
+; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.cond.for.end_crit_edge.epilog-lcssa
+
+; NOEPILOG: for.body:
+; NOEPILOG-NOT: for.body.epil:
+
+; PROLOG: for.body.prol:
+; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit
+
+; NOPROLOG: for.body:
+; NOPROLOG-NOT: for.body.prol:
+
+define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
+entry:
+  %cmp2 = icmp eq i32 %len, 0
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ]
+  %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ]
+  %res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %incdec.ptr = getelementptr inbounds i16, i16* %p.addr.05, i64 1
+  %0 = load i16, i16* %p.addr.05, align 2
+  %conv = zext i16 %0 to i32
+  %add = add i32 %conv, %res.03
+  %sub = add nsw i32 %len.addr.04, -2
+  %cmp = icmp eq i32 %sub, 0
+  br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %phitmp = trunc i32 %add to i16
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i16 %res.0.lcssa
+}
+
+; Test run-time unrolling disable metadata.
+; COMMON-LABEL: @test2(
+
+; EPILOG: for.body:
+; EPILOG-NOT: for.body.epil:
+
+; NOEPILOG: for.body:
+; NOEPILOG-NOT: for.body.epil:
+
+; PROLOG: for.body:
+; PROLOG-NOT: for.body.prol:
+
+; NOPROLOG: for.body:
+; NOPROLOG-NOT: for.body.prol:
+
+define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
+entry:
+  %cmp2 = icmp eq i32 %len, 0
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ]
+  %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ]
+  %res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %incdec.ptr = getelementptr inbounds i16, i16* %p.addr.05, i64 1
+  %0 = load i16, i16* %p.addr.05, align 2
+  %conv = zext i16 %0 to i32
+  %add = add i32 %conv, %res.03
+  %sub = add nsw i32 %len.addr.04, -2
+  %cmp = icmp eq i32 %sub, 0
+  br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body, !llvm.loop !0
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %phitmp = trunc i32 %add to i16
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i16 %res.0.lcssa
+}
+
+; dont unroll loop with multiple exit/exiting blocks, unless
+; -runtime-unroll-multi-exit=true
+; single exit, multiple exiting blocks.
+define void @unique_exit(i32 %arg) {
+; COMMON-LABEL: @unique_exit(
+; COMMON-NOT: .unr
+
+entry:
+  %tmp = icmp sgt i32 undef, %arg
+  br i1 %tmp, label %preheader, label %returnblock
+
+preheader:                                 ; preds = %entry
+  br label %header
+
+LoopExit:                                ; preds = %header, %latch
+  %tmp2.ph = phi i32 [ %tmp4, %header ], [ -1, %latch ]
+  br label %returnblock
+
+returnblock:                                         ; preds = %LoopExit, %entry
+  %tmp2 = phi i32 [ -1, %entry ], [ %tmp2.ph, %LoopExit ]
+  ret void
+
+header:                                           ; preds = %preheader, %latch
+  %tmp4 = phi i32 [ %inc, %latch ], [ %arg, %preheader ]
+  %inc = add nsw i32 %tmp4, 1
+  br i1 true, label %LoopExit, label %latch
+
+latch:                                            ; preds = %header
+  %cmp = icmp slt i32 %inc, undef
+  br i1 %cmp, label %header, label %LoopExit
+}
+
+; multiple exit blocks. don't unroll
+define void @multi_exit(i64 %trip, i1 %cond) {
+; COMMON-LABEL: @multi_exit(
+; COMMON-NOT: .unr
+
+entry:
+  br label %loop_header
+
+loop_header:
+  %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+  br i1 %cond, label %loop_latch, label %loop_exiting_bb1
+
+loop_exiting_bb1:
+  br i1 false, label %loop_exiting_bb2, label %exit1
+
+loop_exiting_bb2:
+  br i1 false, label %loop_latch, label %exit3
+
+exit3:
+  ret void
+
+loop_latch:
+  %iv_next = add i64 %iv, 1
+  %cmp = icmp ne i64 %iv_next, %trip
+  br i1 %cmp, label %loop_header, label %exit2.loopexit
+
+exit1:
+ ret void
+
+exit2.loopexit:
+  ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.runtime.disable"}
+
+; need to use LABEL here to separate function IR matching from metadata matching
+; COMMON-LABEL: {{^}}!0 =
+
+; EPILOG-SAME: distinct !{!0, !1}
+; EPILOG: !1 = !{!"llvm.loop.unroll.disable"}
+
+; PROLOG-SAME: distinct !{!0, !1}
+; PROLOG: !1 = !{!"llvm.loop.unroll.disable"}

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,77 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
+; This tests that setting the unroll count works
+
+
+; EPILOG: for.body.preheader:
+; EPILOG:   br i1 %1, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new, !dbg [[PH_LOC:![0-9]+]]
+; EPILOG: for.body:
+; EPILOG:   br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]]
+; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body
+; EPILOG: for.body.epil.preheader:
+; EPILOG:   br label %for.body.epil, !dbg [[BODY_LOC]]
+; EPILOG: for.body.epil:
+; EPILOG:   br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC]]
+; EPILOG: for.end.loopexit:
+; EPILOG:   br label %for.end, !dbg [[EXIT_LOC:![0-9]+]]
+
+; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
+; EPILOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
+; EPILOG-DAG: [[EXIT_LOC]] = !DILocation(line: 103, column: 1, scope: !{{.*}})
+
+; PROLOG: for.body.preheader:
+; PROLOG:   br {{.*}} label %for.body.prol.preheader, label %for.body.prol.loopexit, !dbg [[PH_LOC:![0-9]+]]
+; PROLOG: for.body.prol:
+; PROLOG:   br label %for.body.prol.loopexit, !dbg [[BODY_LOC:![0-9]+]]
+; PROLOG: for.body.prol.loopexit:
+; PROLOG:   br {{.*}} label %for.end.loopexit, label %for.body.preheader.new, !dbg [[PH_LOC]]
+; PROLOG: for.body:
+; PROLOG:   br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body, !dbg [[BODY_LOC]]
+; PROLOG-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+
+; PROLOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
+; PROLOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly !dbg !6 {
+entry:
+  %cmp1 = icmp eq i32 %n, 0, !dbg !7
+  br i1 %cmp1, label %for.end, label %for.body, !dbg !7
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv, !dbg !8
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !8
+  %add = add nsw i32 %0, %sum.02, !dbg !8
+  %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !9
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9
+  %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !9
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !9
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa, !dbg !10
+}
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.dbg.cu = !{!11}
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = !{i32 1, !"PIC Level", i32 2}
+
+!3 = !{}
+!4 = !DISubroutineType(types: !3)
+!5 = !DIFile(filename: "test.cpp", directory: "/tmp")
+!6 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, unit: !11, retainedNodes: !3)
+!7 = !DILocation(line: 100, column: 1, scope: !6)
+!8 = !DILocation(line: 101, column: 1, scope: !6)
+!9 = !DILocation(line: 102, column: 1, scope: !6)
+!10 = !DILocation(line: 103, column: 1, scope: !6)
+!11 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
+                             file: !5,
+                             isOptimized: true, flags: "-O2",
+                             splitDebugFilename: "abc.debug", emissionKind: 2) 

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop2.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true  -unroll-count=8 | FileCheck %s  -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=true  -unroll-count=8 | FileCheck %s  -check-prefix=EPILOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-threshold=25 -unroll-partial-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
+; Choose a smaller, power-of-two, unroll count if the loop is too large.
+; This test makes sure we're not unrolling 'odd' counts
+
+; EPILOG: for.body:
+; EPILOG: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit{{.*}}, label %for.body
+; EPILOG-NOT: br i1 %niter.ncmp.4, label %for.end.loopexit.unr-lcssa.loopexit{{.*}}, label %for.body
+; EPILOG: for.body.epil:
+
+; PROLOG: for.body.prol:
+; PROLOG: for.body:
+; PROLOG: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; PROLOG-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop3.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop3.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop3.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,46 @@
+; REQUIRES: asserts
+; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-partial-threshold=200 -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
+; RUN: opt < %s -disable-output -stats -passes='require<opt-remark-emit>,unroll' -unroll-runtime -unroll-partial-threshold=200 -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
+
+; Test that nested loops can be unrolled.  We need to increase threshold to do it
+
+; STATS: 2 loop-unroll - Number of loops unrolled (completely or otherwise)
+
+define i32 @nested(i32* nocapture %a, i32 %n, i32 %m) nounwind uwtable readonly {
+entry:
+  %cmp11 = icmp sgt i32 %n, 0
+  br i1 %cmp11, label %for.cond1.preheader.lr.ph, label %for.end7
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp28 = icmp sgt i32 %m, 0
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc5, %for.cond1.preheader.lr.ph
+  %indvars.iv16 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next17, %for.inc5 ]
+  %sum.012 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %sum.1.lcssa, %for.inc5 ]
+  br i1 %cmp28, label %for.body3, label %for.inc5
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.cond1.preheader ]
+  %sum.19 = phi i32 [ %add4, %for.body3 ], [ %sum.012, %for.cond1.preheader ]
+  %0 = add nsw i64 %indvars.iv, %indvars.iv16
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %0
+  %1 = load i32, i32* %arrayidx, align 4
+  %add4 = add nsw i32 %1, %sum.19
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %m
+  br i1 %exitcond, label %for.inc5, label %for.body3
+
+for.inc5:                                         ; preds = %for.body3, %for.cond1.preheader
+  %sum.1.lcssa = phi i32 [ %sum.012, %for.cond1.preheader ], [ %add4, %for.body3 ]
+  %indvars.iv.next17 = add i64 %indvars.iv16, 1
+  %lftr.wideiv18 = trunc i64 %indvars.iv.next17 to i32
+  %exitcond19 = icmp eq i32 %lftr.wideiv18, %n
+  br i1 %exitcond19, label %for.end7, label %for.cond1.preheader
+
+for.end7:                                         ; preds = %for.inc5, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.inc5 ]
+  ret i32 %sum.0.lcssa
+}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop4.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,52 @@
+; RUN: opt < %s -S -O2 -unroll-runtime=true -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -O2 -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
+; Check runtime unrolling prologue can be promoted by LICM pass.
+
+; EPILOG: entry:
+; EPILOG: %xtraiter
+; EPILOG: %lcmp.mod
+; EPILOG: loop1:
+; EPILOG: br i1 %lcmp.mod
+; EPILOG: loop2.epil:
+
+; PROLOG: entry:
+; PROLOG: %xtraiter
+; PROLOG: %lcmp.mod
+; PROLOG: loop1:
+; PROLOG: br i1 %lcmp.mod
+; PROLOG: loop2.prol:
+
+define void @unroll(i32 %iter, i32* %addr1, i32* %addr2) nounwind {
+entry:
+  br label %loop1
+
+loop1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
+  %offset1 = getelementptr i32, i32* %addr1, i32 %iv1
+  store i32 %iv1, i32* %offset1, align 4
+  br label %loop2.header
+
+loop2.header:
+  %e = icmp uge i32 %iter, 1
+  br i1 %e, label %loop2, label %exit2
+
+loop2:
+  %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
+  %offset2 = getelementptr i32, i32* %addr2, i32 %iv2
+  store i32 %iv2, i32* %offset2, align 4
+  %inc2 = add i32 %iv2, 1
+  %exitcnd2 = icmp uge i32 %inc2, %iter
+  br i1 %exitcnd2, label %exit2, label %loop2
+
+exit2:
+  br label %loop1.latch
+
+loop1.latch:
+  %inc1 = add i32 %iv1, 1
+  %exitcnd1 = icmp uge i32 %inc1, 1024
+  br i1 %exitcnd1, label %exit, label %loop1
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,49 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
+
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-count=16 | FileCheck --check-prefix=UNROLL-16 %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-count=4 | FileCheck --check-prefix=UNROLL-4 %s
+
+; Given that the trip-count of this loop is a 3-bit value, we cannot
+; safely unroll it with a count of anything more than 8.
+
+define i3 @test(i3* %a, i3 %n) {
+; UNROLL-16-LABEL: @test(
+; UNROLL-4-LABEL: @test(
+entry:
+  %cmp1 = icmp eq i3 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; UNROLL-16-LABEL: for.body:
+; UNROLL-4-LABEL: for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i3 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i3, i3* %a, i64 %indvars.iv
+
+; UNROLL-16-LABEL: for.body
+; UNROLL-16-LABEL: getelementptr
+; UNROLL-16-LABEL-NOT: getelementptr
+
+; UNROLL-4-LABEL: getelementptr
+; UNROLL-4-LABEL: getelementptr
+; UNROLL-4-LABEL: getelementptr
+; UNROLL-4-LABEL: getelementptr
+
+  %0 = load i3, i3* %arrayidx
+  %add = add nsw i3 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i3
+  %exitcond = icmp eq i3 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+; UNROLL-16-LABEL: for.end
+; UNROLL-4-LABEL: for.end
+
+; UNROLL-16-NOT: for.body.epil:
+; UNROLL-4: for.body.epil:
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i3 [ 0, %entry ], [ %add, %for.body ]
+  ret i3 %sum.0.lcssa
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,94 @@
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -verify-dom-info -unroll-runtime-multi-exit=false -verify-loop-info -S | FileCheck %s -check-prefix=NOUNROLL
+
+; this tests when unrolling multiple exit loop occurs by default (i.e. without specifying -unroll-runtime-multi-exit)
+
+; the second exit block is a deopt block. The loop has one exiting block other than the latch.
+define i32 @test1(i32* nocapture %a, i64 %n) {
+; CHECK-LABEL: test1(
+; CHECK-LABEL:  header.epil:
+; CHECK-NEXT:     %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
+; CHECK-LABEL:  otherexit.loopexit:
+; CHECK-NEXT:     %sum.02.lcssa.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ],
+; CHECK-NEXT:     br label %otherexit
+; CHECK-LABEL:  otherexit.loopexit3:
+; CHECK-NEXT:     br label %otherexit
+; CHECK-LABEL:  otherexit:
+; CHECK-NEXT:     %sum.02.lcssa = phi i32 [ %sum.02.lcssa.ph, %otherexit.loopexit ], [ %sum.02.epil, %otherexit.loopexit3 ]
+; CHECK-NEXT:     %rval = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02.lcssa) ]
+; CHECK-NEXT:     ret i32 %rval
+; CHECK-LABEL:  latch.7:
+; CHECK:          add i64 %indvars.iv, 8
+
+; NOUNROLL: test1(
+; NOUNROLL-NOT: .epil
+; NOUNROLL-NOT: .prol
+; NOUNROLL:   otherexit:
+; NOUNROLL-NEXT:   %sum.02.lcssa = phi i32 [ %sum.02, %for.exiting_block ]
+; NOUNROLL-NEXT:   %rval = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02.lcssa) ] 
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+  br label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %otherexit, label %latch
+
+latch:
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %latchexit, label %header
+
+latchexit:                                          ; preds = %latch
+  %sum.0.lcssa = phi i32 [ %add, %latch ]
+  ret i32 %sum.0.lcssa
+
+otherexit:
+  %rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02) ]
+  ret i32 %rval
+}
+
+; the exit block is not a deopt block.
+define i32 @test2(i32* nocapture %a, i64 %n) {
+; CHECK-LABEL: test2(
+; CHECK-NOT: .epil
+; CHECK-NOT: .prol
+; CHECK-LABEL: otherexit:
+; CHECK-NEXT:    ret i32 %sum.02
+
+entry:
+  br label %header
+
+header:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ]
+  br label %for.exiting_block
+
+for.exiting_block:
+ %cmp = icmp eq i64 %n, 42
+ br i1 %cmp, label %otherexit, label %latch
+
+latch:
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %latchexit, label %header
+
+latchexit:                                          ; preds = %latch
+  %sum.0.lcssa = phi i32 [ %add, %latch ]
+  ret i32 %sum.0.lcssa
+
+otherexit:
+  %rval = phi i32 [%sum.02, %for.exiting_block ]
+  ret i32 %rval
+}
+declare i32 @llvm.experimental.deoptimize.i32(...)

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,74 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 -unroll-remainder -instcombine | FileCheck %s
+
+; CHECK-LABEL: unroll
+define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+entry:
+  %cmp9 = icmp eq i32 %N, 0
+  br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
+
+for.body.lr.ph:
+  %wide.trip.count = zext i32 %N to i64
+  br label %for.body
+
+for.cond.cleanup:
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %c.0.lcssa
+
+; CHECK-LABEL: for.body.lr.ph
+; CHECK: [[COUNT:%[a-z.0-9]+]] = add nsw i64 %wide.trip.count, -1
+; CHECK: %xtraiter = and i64 %wide.trip.count, 3
+; CHECK: [[CMP:%[a-z.0-9]+]] = icmp ult i64 [[COUNT]], 3
+; CHECK: br i1 [[CMP]], label %[[CLEANUP:.*]], label %for.body.lr.ph.new
+
+; CHECK-LABEL: for.body.lr.ph.new:
+; CHECK: %unroll_iter = sub nsw i64 %wide.trip.count, %xtraiter
+; CHECK: br label %for.body
+
+; CHECK: [[CLEANUP]]:
+; CHECK: [[MOD:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 0
+; CHECK: br i1 [[MOD]], label %[[EXIT:.*]], label %[[EPIL_PEEL0_PRE:.*]]
+
+; CHECK: [[EPIL_PEEL0_PRE]]:
+; CHECK: br label %[[EPIL_PEEL0:.*]]
+
+; CHECK: [[EPIL_PEEL0]]:
+; CHECK: [[PEEL_CMP0:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 1
+; CHECK: br i1 [[PEEL_CMP0]], label %[[EPIL_EXIT:.*]], label %[[EPIL_PEEL1:.*]]
+
+; CHECK: [[EPIL_EXIT]]:
+; CHECK: br label %[[EXIT]]
+
+; CHECK: [[EXIT]]:
+; CHECK: ret i32
+
+; CHECK-LABEL: for.body:
+; CHECK: [[INDVAR0:%[a-z.0-9]+]] = phi i64 [ 0, %for.body.lr.ph
+; CHECK: [[ITER:%[a-z.0-9]+]] = phi i64 [ %unroll_iter
+; CHECK: or i64 [[INDVAR0]], 1
+; CHECK: or i64 [[INDVAR0]], 2
+; CHECK: or i64 [[INDVAR0]], 3
+; CHECK: add nuw nsw i64 [[INDVAR0]], 4
+; CHECK: [[SUB:%[a-z.0-9]+]] = add i64 [[ITER]], -4
+; CHECK: [[ITER_CMP:%[a-z.0-9]+]] = icmp eq i64 [[SUB]], 0
+; CHECK: br i1 [[ITER_CMP]], label %[[LOOP_EXIT:.*]], label %for.body
+
+; CHECK: [[EPIL_PEEL1]]:
+; CHECK: [[PEEL_CMP1:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 2
+; CHECK: br i1 [[PEEL_CMP1]], label %[[EPIL_EXIT]], label %[[EPIL_PEEL2:.*]]
+
+; CHECK: [[EPIL_PEEL2]]:
+; CHECK: br label %[[EXIT]]
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %c.010 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, %c.010
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/scevunroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/scevunroll.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/scevunroll.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/scevunroll.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,207 @@
+; RUN: opt < %s -S -indvars -loop-unroll -verify-loop-info | FileCheck %s
+;
+; Unit tests for loop unrolling using ScalarEvolution to compute trip counts.
+;
+; Indvars is run first to generate an "old" SCEV result. Some unit
+; tests may check that SCEV is properly invalidated between passes.
+
+; Completely unroll loops without a canonical IV.
+;
+; CHECK-LABEL: @sansCanonical(
+; CHECK-NOT: phi
+; CHECK-NOT: icmp
+; CHECK: ret
+define i32 @sansCanonical(i32* %base) nounwind {
+entry:
+  br label %while.body
+
+while.body:
+  %iv = phi i64 [ 10, %entry ], [ %iv.next, %while.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.next, %while.body ]
+  %iv.next = add i64 %iv, -1
+  %adr = getelementptr inbounds i32, i32* %base, i64 %iv.next
+  %tmp = load i32, i32* %adr, align 8
+  %sum.next = add i32 %sum, %tmp
+  %iv.narrow = trunc i64 %iv.next to i32
+  %cmp.i65 = icmp sgt i32 %iv.narrow, 0
+  br i1 %cmp.i65, label %while.body, label %exit
+
+exit:
+  ret i32 %sum
+}
+
+; SCEV unrolling properly handles loops with multiple exits. In this
+; case, the computed trip count based on a canonical IV is *not* for a
+; latch block. Canonical unrolling incorrectly unrolls it, but SCEV
+; unrolling does not.
+;
+; CHECK-LABEL: @earlyLoopTest(
+; CHECK: tail:
+; CHECK-NOT: br
+; CHECK: br i1 %cmp2, label %loop, label %exit2
+define i64 @earlyLoopTest(i64* %base) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %inc, %tail ]
+  %s = phi i64 [ 0, %entry ], [ %s.next, %tail ]
+  %adr = getelementptr i64, i64* %base, i64 %iv
+  %val = load i64, i64* %adr
+  %s.next = add i64 %s, %val
+  %inc = add i64 %iv, 1
+  %cmp = icmp ne i64 %inc, 4
+  br i1 %cmp, label %tail, label %exit1
+
+tail:
+  %cmp2 = icmp ne i64 %val, 0
+  br i1 %cmp2, label %loop, label %exit2
+
+exit1:
+  ret i64 %s
+
+exit2:
+  ret i64 %s.next
+}
+
+; SCEV properly unrolls multi-exit loops.
+;
+; CHECK-LABEL: @multiExit(
+; CHECK: getelementptr i32, i32* %base, i32 10
+; CHECK-NEXT: load i32, i32*
+; CHECK: br i1 false, label %l2.10, label %exit1
+; CHECK: l2.10:
+; CHECK-NOT: br
+; CHECK: ret i32
+define i32 @multiExit(i32* %base) nounwind {
+entry:
+  br label %l1
+l1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %l2 ]
+  %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l2 ]
+  %inc1 = add i32 %iv1, 1
+  %inc2 = add i32 %iv2, 1
+  %adr = getelementptr i32, i32* %base, i32 %iv1
+  %val = load i32, i32* %adr
+  %cmp1 = icmp slt i32 %iv1, 5
+  br i1 %cmp1, label %l2, label %exit1
+l2:
+  %cmp2 = icmp slt i32 %iv2, 10
+  br i1 %cmp2, label %l1, label %exit2
+exit1:
+  ret i32 1
+exit2:
+  ret i32 %val
+}
+
+
+; SCEV should not unroll a multi-exit loops unless the latch block has
+; a known trip count, regardless of the early exit trip counts. The
+; LoopUnroll utility uses this assumption to optimize the latch
+; block's branch.
+;
+; CHECK-LABEL: @multiExitIncomplete(
+; CHECK: l3:
+; CHECK-NOT: br
+; CHECK:   br i1 %cmp3, label %l1, label %exit3
+define i32 @multiExitIncomplete(i32* %base) nounwind {
+entry:
+  br label %l1
+l1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %l3 ]
+  %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l3 ]
+  %inc1 = add i32 %iv1, 1
+  %inc2 = add i32 %iv2, 1
+  %adr = getelementptr i32, i32* %base, i32 %iv1
+  %val = load i32, i32* %adr
+  %cmp1 = icmp slt i32 %iv1, 5
+  br i1 %cmp1, label %l2, label %exit1
+l2:
+  %cmp2 = icmp slt i32 %iv2, 10
+  br i1 %cmp2, label %l3, label %exit2
+l3:
+  %cmp3 = icmp ne i32 %val, 0
+  br i1 %cmp3, label %l1, label %exit3
+
+exit1:
+  ret i32 1
+exit2:
+  ret i32 2
+exit3:
+  ret i32 3
+}
+
+; When loop unroll merges a loop exit with one of its parent loop's
+; exits, SCEV must forget its ExitNotTaken info.
+;
+; CHECK-LABEL: @nestedUnroll(
+; CHECK-NOT: br i1
+; CHECK: for.body87:
+define void @nestedUnroll() nounwind {
+entry:
+  br label %for.inc
+
+for.inc:
+  br i1 false, label %for.inc, label %for.body38.preheader
+
+for.body38.preheader:
+  br label %for.body38
+
+for.body38:
+  %i.113 = phi i32 [ %inc76, %for.inc74 ], [ 0, %for.body38.preheader ]
+  %mul48 = mul nsw i32 %i.113, 6
+  br label %for.body43
+
+for.body43:
+  %j.011 = phi i32 [ 0, %for.body38 ], [ %inc72, %for.body43 ]
+  %add49 = add nsw i32 %j.011, %mul48
+  %sh_prom50 = zext i32 %add49 to i64
+  %inc72 = add nsw i32 %j.011, 1
+  br i1 false, label %for.body43, label %for.inc74
+
+for.inc74:
+  %inc76 = add nsw i32 %i.113, 1
+  br i1 false, label %for.body38, label %for.body87.preheader
+
+for.body87.preheader:
+  br label %for.body87
+
+for.body87:
+  br label %for.body87
+}
+
+; PR16130: clang produces incorrect code with loop/expression at -O2
+; rdar:14036816 loop-unroll makes assumptions about undefined behavior
+;
+; The loop latch is assumed to exit after the first iteration because
+; of the induction variable's NSW flag. However, the loop latch's
+; equality test is skipped and the loop exits after the second
+; iteration via the early exit. So loop unrolling cannot assume that
+; the loop latch's exit count of zero is an upper bound on the number
+; of iterations.
+;
+; CHECK-LABEL: @nsw_latch(
+; CHECK: for.body:
+; CHECK: %b.03 = phi i32 [ 0, %entry ], [ %add, %for.cond ]
+; CHECK: return:
+; CHECK: %b.03.lcssa = phi i32 [ %b.03, %for.body ], [ %b.03, %for.cond ]
+define void @nsw_latch(i32* %a) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond, %entry
+  %b.03 = phi i32 [ 0, %entry ], [ %add, %for.cond ]
+  %tobool = icmp eq i32 %b.03, 0
+  %add = add nsw i32 %b.03, 8
+  br i1 %tobool, label %for.cond, label %return
+
+for.cond:                                         ; preds = %for.body
+  %cmp = icmp eq i32 %add, 13
+  br i1 %cmp, label %return, label %for.body
+
+return:                                           ; preds = %for.body, %for.cond
+  %b.03.lcssa = phi i32 [ %b.03, %for.body ], [ %b.03, %for.cond ]
+  %retval.0 = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+  store i32 %b.03.lcssa, i32* %a, align 4
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/shifted-tripcount.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/shifted-tripcount.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/shifted-tripcount.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/shifted-tripcount.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-unroll -unroll-count=2 -S | FileCheck %s
+
+; LoopUnroll should unroll this loop into one big basic block.
+
+; CHECK: for.body:
+; CHECK: %i.013 = phi i64 [ 0, %entry ], [ %tmp16.1, %for.body ]
+; CHECK: br i1 %exitcond.1, label %for.end, label %for.body
+
+define void @foo(double* nocapture %p, i64 %n) nounwind {
+entry:
+  %mul10 = shl i64 %n, 1                          ; <i64> [#uses=2]
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.013 = phi i64 [ %tmp16, %for.body ], [ 0, %entry ] ; <i64> [#uses=2]
+  %arrayidx7 = getelementptr double, double* %p, i64 %i.013 ; <double*> [#uses=2]
+  %tmp16 = add i64 %i.013, 1                      ; <i64> [#uses=3]
+  %arrayidx = getelementptr double, double* %p, i64 %tmp16 ; <double*> [#uses=1]
+  %tmp4 = load double, double* %arrayidx                  ; <double> [#uses=1]
+  %tmp8 = load double, double* %arrayidx7                 ; <double> [#uses=1]
+  %mul9 = fmul double %tmp8, %tmp4                ; <double> [#uses=1]
+  store double %mul9, double* %arrayidx7
+  %exitcond = icmp eq i64 %tmp16, %mul10          ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,49 @@
+; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; This test case documents how runtime loop unrolling handles the case
+; when the backedge-count is -1.
+
+; If %N, the backedge-taken count, is -1 then %0 unsigned-overflows
+; and is 0.  %xtraiter too is 0, signifying that the total trip-count
+; is divisible by 2.  The prologue then branches to the unrolled loop
+; and executes the 2^32 iterations there, in groups of 2.
+
+; EPILOG: entry:
+
+; EPILOG-NEXT: %0 = add i32 %N, 1
+; EPILOG-NEXT: %xtraiter = and i32 %0, 1
+; EPILOG-NEXT: %1 = icmp ult i32 %N, 1
+; EPILOG-NEXT: br i1 %1, label %while.end.unr-lcssa, label %entry.new
+; EPILOG: while.body:
+
+; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG-NEXT: br i1 %lcmp.mod, label %while.body.epil.preheader, label %while.end
+; EPILOG: while.body.epil:
+
+; PROLOG: entry:
+; PROLOG-NEXT: %0 = add i32 %N, 1
+; PROLOG-NEXT: %xtraiter = and i32 %0, 1
+; PROLOG-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; PROLOG-NEXT: br i1 %lcmp.mod, label %while.body.prol.preheader, label %while.body.prol.loopexit
+; PROLOG: while.body.prol:
+
+; PROLOG: %1 = icmp ult i32 %N, 1
+; PROLOG-NEXT: br i1 %1, label %while.end, label %entry.new
+; PROLOG: while.body:
+
+; Function Attrs: nounwind readnone ssp uwtable
+define i32 @foo(i32 %N) {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %i = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %cmp = icmp eq i32 %i, %N
+  %inc = add i32 %i, 1
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body
+  ret i32 %i
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/unloop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unloop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unloop.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/unloop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,473 @@
+; RUN: opt < %s -S -loop-unroll -verify-loop-info | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll,verify<loops>' | FileCheck %s
+;
+; Unit tests for LoopInfo::markAsRemoved.
+
+declare i1 @check() nounwind
+
+; Ensure that tail->inner is removed and rely on verify-loopinfo to
+; check soundness.
+;
+; CHECK-LABEL: @skiplevelexit(
+; CHECK: tail:
+; CHECK-NOT: br
+; CHECK: ret void
+define void @skiplevelexit() nounwind {
+entry:
+  br label %outer
+
+outer:
+  br label %inner
+
+inner:
+  %iv = phi i32 [ 0, %outer ], [ %inc, %tail ]
+  %inc = add i32 %iv, 1
+  call zeroext i1 @check()
+  br i1 true, label %outer.backedge, label %tail
+
+tail:
+  br i1 false, label %inner, label %exit
+
+outer.backedge:
+  br label %outer
+
+exit:
+  ret void
+}
+
+; Remove the middle loop of a triply nested loop tree.
+; Ensure that only the middle loop is removed and rely on verify-loopinfo to
+; check soundness.
+;
+; CHECK-LABEL: @unloopNested(
+; Outer loop control.
+; CHECK: while.body:
+; CHECK: br i1 %cmp3, label %if.then, label %if.end
+; Inner loop control.
+; CHECK: while.end14.i:
+; CHECK: br i1 %call15.i, label %if.end.i, label %exit
+; Middle loop control should no longer reach %while.cond.
+; Now it is the outer loop backedge.
+; CHECK: exit:
+; CHECK: br label %while.cond.outer
+define void @unloopNested() {
+entry:
+  br label %while.cond.outer
+
+while.cond.outer:
+  br label %while.cond
+
+while.cond:
+  %cmp = call zeroext i1 @check()
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:
+  %cmp3 = call zeroext i1 @check()
+  br i1 %cmp3, label %if.then, label %if.end
+
+if.then:
+  br label %return
+
+if.end:
+  %cmp.i48 = call zeroext i1 @check()
+  br i1 %cmp.i48, label %if.then.i, label %if.else20.i
+
+if.then.i:
+  %cmp8.i = call zeroext i1 @check()
+  br i1 %cmp8.i, label %merge, label %if.else.i
+
+if.else.i:
+  br label %merge
+
+if.else20.i:
+  %cmp25.i = call zeroext i1 @check()
+  br i1 %cmp25.i, label %merge, label %if.else28.i
+
+if.else28.i:
+  br label %merge
+
+merge:
+  br label %while.cond2.i
+
+while.cond2.i:
+  %cmp.i = call zeroext i1 @check()
+  br i1 %cmp.i, label %while.cond2.backedge.i, label %while.end.i
+
+while.cond2.backedge.i:
+  br label %while.cond2.i
+
+while.end.i:
+  %cmp1114.i = call zeroext i1 @check()
+  br i1 %cmp1114.i, label %while.body12.lr.ph.i, label %while.end14.i
+
+while.body12.lr.ph.i:
+  br label %while.end14.i
+
+while.end14.i:
+  %call15.i = call zeroext i1 @check()
+  br i1 %call15.i, label %if.end.i, label %exit
+
+if.end.i:
+  br label %while.cond2.backedge.i
+
+exit:
+  br i1 false, label %while.cond, label %if.else
+
+if.else:
+  br label %while.cond.outer
+
+while.end:
+  br label %return
+
+return:
+  ret void
+}
+
+; Remove the middle loop of a deeply nested loop tree.
+; Ensure that only the middle loop is removed and rely on verify-loopinfo to
+; check soundness.
+;
+; This test must be disabled until trip count computation can be optimized...
+; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops.
+; CHECKFIXME-LABEL: @unloopDeepNested(
+; Inner-inner loop control.
+; CHECKFIXME: while.cond.us.i:
+; CHECKFIXME: br i1 %cmp.us.i, label %next_data.exit, label %while.body.us.i
+; CHECKFIXME: if.then.us.i:
+; CHECKFIXME: br label %while.cond.us.i
+; Inner loop tail.
+; CHECKFIXME: if.else.i:
+; CHECKFIXME: br label %while.cond.outer.i
+; Middle loop control (removed).
+; CHECKFIXME: valid_data.exit:
+; CHECKFIXME-NOT: br
+; CHECKFIXME: %cmp = call zeroext i1 @check()
+; Outer loop control.
+; CHECKFIXME: copy_data.exit:
+; CHECKFIXME: br i1 %cmp38, label %if.then39, label %while.cond.outer
+; Outer-outer loop tail.
+; CHECKFIXME: while.cond.outer.outer.backedge:
+; CHECKFIXME: br label %while.cond.outer.outer
+define void @unloopDeepNested() nounwind {
+for.cond8.preheader.i:
+  %cmp113.i = call zeroext i1 @check()
+  br i1 %cmp113.i, label %make_data.exit, label %for.body13.lr.ph.i
+
+for.body13.lr.ph.i:
+  br label %make_data.exit
+
+make_data.exit:
+  br label %while.cond.outer.outer
+
+while.cond.outer.outer:
+  br label %while.cond.outer
+
+while.cond.outer:
+  br label %while.cond
+
+while.cond:
+  br label %while.cond.outer.i
+
+while.cond.outer.i:
+  %tmp192.ph.i = call zeroext i1 @check()
+  br i1 %tmp192.ph.i, label %while.cond.outer.split.us.i, label %while.body.loopexit
+
+while.cond.outer.split.us.i:
+  br label %while.cond.us.i
+
+while.cond.us.i:
+  %cmp.us.i = call zeroext i1 @check()
+  br i1 %cmp.us.i, label %next_data.exit, label %while.body.us.i
+
+while.body.us.i:
+  %cmp7.us.i = call zeroext i1 @check()
+  br i1 %cmp7.us.i, label %if.then.us.i, label %if.else.i
+
+if.then.us.i:
+  br label %while.cond.us.i
+
+if.else.i:
+  br label %while.cond.outer.i
+
+next_data.exit:
+  %tmp192.ph.i.lcssa28 = call zeroext i1 @check()
+  br i1 %tmp192.ph.i.lcssa28, label %while.end, label %while.body
+
+while.body.loopexit:
+  br label %while.body
+
+while.body:
+  br label %while.cond.i
+
+while.cond.i:
+  %cmp.i = call zeroext i1 @check()
+  br i1 %cmp.i, label %valid_data.exit, label %while.body.i
+
+while.body.i:
+  %cmp7.i = call zeroext i1 @check()
+  br i1 %cmp7.i, label %valid_data.exit, label %if.end.i
+
+if.end.i:
+  br label %while.cond.i
+
+valid_data.exit:
+  br i1 true, label %if.then, label %while.cond
+
+if.then:
+  %cmp = call zeroext i1 @check()
+  br i1 %cmp, label %if.then12, label %if.end
+
+if.then12:
+  br label %if.end
+
+if.end:
+  %tobool3.i = call zeroext i1 @check()
+  br i1 %tobool3.i, label %copy_data.exit, label %while.body.lr.ph.i
+
+while.body.lr.ph.i:
+  br label %copy_data.exit
+
+copy_data.exit:
+  %cmp38 = call zeroext i1 @check()
+  br i1 %cmp38, label %if.then39, label %while.cond.outer
+
+if.then39:
+  %cmp5.i = call zeroext i1 @check()
+  br i1 %cmp5.i, label %while.cond.outer.outer.backedge, label %for.cond8.preheader.i8.thread
+
+for.cond8.preheader.i8.thread:
+  br label %while.cond.outer.outer.backedge
+
+while.cond.outer.outer.backedge:
+  br label %while.cond.outer.outer
+
+while.end:
+  ret void
+}
+
+; Remove a nested loop with irreducible control flow.
+; Ensure that only the middle loop is removed and rely on verify-loopinfo to
+; check soundness.
+;
+; CHECK-LABEL: @unloopIrreducible(
+; Irreducible loop.
+; CHECK: for.inc117:
+; CHECK: br label %for.cond103t
+; Nested loop (removed).
+; CHECK: for.inc159:
+; CHECK: br label %for.inc163
+define void @unloopIrreducible() nounwind {
+
+entry:
+  br label %for.body
+
+for.body:
+  %cmp2113 = call zeroext i1 @check()
+  br i1 %cmp2113, label %for.body22.lr.ph, label %for.inc163
+
+for.body22.lr.ph:
+  br label %for.body22
+
+for.body22:
+  br label %for.body33
+
+for.body33:
+  br label %for.end
+
+for.end:
+  %cmp424 = call zeroext i1 @check()
+  br i1 %cmp424, label %for.body43.lr.ph, label %for.end93
+
+for.body43.lr.ph:
+  br label %for.end93
+
+for.end93:
+  %cmp96 = call zeroext i1 @check()
+  br i1 %cmp96, label %if.then97, label %for.cond103
+
+if.then97:
+  br label %for.cond103t
+
+for.cond103t:
+  br label %for.cond103
+
+for.cond103:
+  %cmp105 = call zeroext i1 @check()
+  br i1 %cmp105, label %for.body106, label %for.end120
+
+for.body106:
+  %cmp108 = call zeroext i1 @check()
+  br i1 %cmp108, label %if.then109, label %for.inc117
+
+if.then109:
+  br label %for.inc117
+
+for.inc117:
+  br label %for.cond103t
+
+for.end120:
+  br label %for.inc159
+
+for.inc159:
+  br i1 false, label %for.body22, label %for.cond15.for.inc163_crit_edge
+
+for.cond15.for.inc163_crit_edge:
+  br label %for.inc163
+
+for.inc163:
+  %cmp12 = call zeroext i1 @check()
+  br i1 %cmp12, label %for.body, label %for.end166
+
+for.end166:
+  ret void
+
+}
+
+; Remove a loop whose exit branches into a sibling loop.
+; Ensure that only the loop is removed and rely on verify-loopinfo to
+; check soundness.
+;
+; CHECK-LABEL: @unloopCriticalEdge(
+; CHECK: while.cond.outer.i.loopexit.split:
+; CHECK: br label %while.body
+; CHECK: while.body:
+; CHECK: br label %for.end78
+define void @unloopCriticalEdge() nounwind {
+entry:
+  br label %for.cond31
+
+for.cond31:
+  br i1 undef, label %for.body35, label %for.end94
+
+for.body35:
+  br label %while.cond.i.preheader
+
+while.cond.i.preheader:
+  br i1 undef, label %while.cond.i.preheader.split, label %while.cond.outer.i.loopexit.split
+
+while.cond.i.preheader.split:
+  br label %while.cond.i
+
+while.cond.i:
+  br i1 true, label %while.cond.i, label %while.cond.outer.i.loopexit
+
+while.cond.outer.i.loopexit:
+  br label %while.cond.outer.i.loopexit.split
+
+while.cond.outer.i.loopexit.split:
+  br i1 false, label %while.cond.i.preheader, label %Func2.exit
+
+Func2.exit:
+  br label %while.body
+
+while.body:
+  br i1 false, label %while.body, label %while.end
+
+while.end:
+  br label %for.end78
+
+for.end78:
+  br i1 undef, label %Proc2.exit, label %for.cond.i.preheader
+
+for.cond.i.preheader:
+  br label %for.cond.i
+
+for.cond.i:
+  br label %for.cond.i
+
+Proc2.exit:
+  br label %for.cond31
+
+for.end94:
+  ret void
+}
+
+; Test UnloopUpdater::removeBlocksFromAncestors.
+;
+; Check that the loop backedge is removed from the middle loop 1699,
+; but not the inner loop 1676.
+; CHECK: while.body1694:
+; CHECK:   br label %while.cond1676
+; CHECK: while.end1699:
+; CHECK:   br label %sw.default1711
+define void @removeSubloopBlocks() nounwind {
+entry:
+  br label %tryagain.outer
+
+tryagain.outer:                                   ; preds = %sw.bb304, %entry
+  br label %tryagain
+
+tryagain:                                         ; preds = %while.end1699, %tryagain.outer
+  br i1 undef, label %sw.bb1669, label %sw.bb304
+
+sw.bb304:                                         ; preds = %tryagain
+  br i1 undef, label %return, label %tryagain.outer
+
+sw.bb1669:                                        ; preds = %tryagain
+  br i1 undef, label %sw.default1711, label %while.cond1676
+
+while.cond1676:                                   ; preds = %while.body1694, %sw.bb1669
+  br i1 undef, label %while.end1699, label %while.body1694
+
+while.body1694:                                   ; preds = %while.cond1676
+  br label %while.cond1676
+
+while.end1699:                                    ; preds = %while.cond1676
+  br i1 false, label %tryagain, label %sw.default1711
+
+sw.default1711:                                   ; preds = %while.end1699, %sw.bb1669, %tryagain
+  br label %defchar
+
+defchar:                                          ; preds = %sw.default1711, %sw.bb376
+  br i1 undef, label %if.end2413, label %if.then2368
+
+if.then2368:                                      ; preds = %defchar
+  unreachable
+
+if.end2413:                                       ; preds = %defchar
+  unreachable
+
+return:                                           ; preds = %sw.bb304
+  ret void
+}
+
+; PR11335: the most deeply nested block should be removed from the outer loop.
+; CHECK-LABEL: @removeSubloopBlocks2(
+; CHECK: for.cond3:
+; CHECK-NOT: br
+; CHECK: ret void
+define void @removeSubloopBlocks2() nounwind {
+entry:
+  %tobool.i = icmp ne i32 undef, 0
+  br label %lbl_616
+
+lbl_616.loopexit:                                 ; preds = %for.cond
+  br label %lbl_616
+
+lbl_616:                                          ; preds = %lbl_616.loopexit, %entry
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond3, %lbl_616
+  br i1 false, label %for.cond1.preheader, label %lbl_616.loopexit
+
+for.cond1.preheader:                              ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1.loopexit:                               ; preds = %for.cond.i
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.cond1.loopexit, %for.cond1.preheader
+  br i1 false, label %for.body2, label %for.cond3
+
+for.body2:                                        ; preds = %for.cond1
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.cond.i, %for.body2
+  br i1 %tobool.i, label %for.cond.i, label %for.cond1.loopexit
+
+for.cond3:                                        ; preds = %for.cond1
+  br i1 false, label %for.cond, label %if.end
+
+if.end:                                           ; preds = %for.cond3
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanup.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanup.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanup.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,85 @@
+; PR23524
+; The test is to check redundency produced by loop unroll pass
+; should be cleaned up by later pass.
+; RUN: opt < %s -O2 -S | FileCheck %s
+
+; After loop unroll:
+;       %dec18 = add nsw i32 %dec18.in, -1
+;       ...
+;       %dec18.1 = add nsw i32 %dec18, -1
+; should be merged to:
+;       %dec18.1 = add nsw i32 %dec18.in, -2
+;
+; CHECK-LABEL: @_Z3fn1v(
+; CHECK: %dec18.1 = add nsw i32 %dec18.in, -2
+
+; ModuleID = '<stdin>'
+target triple = "x86_64-unknown-linux-gnu"
+
+ at b = global i32 0, align 4
+ at c = global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define void @_Z3fn1v() #0 {
+entry:
+  %tmp = load i32, i32* @b, align 4
+  %tobool20 = icmp eq i32 %tmp, 0
+  br i1 %tobool20, label %for.end6, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.cond1.for.cond.loopexit_crit_edge:            ; preds = %for.inc
+  %add.ptr.lcssa = phi i16* [ %add.ptr, %for.inc ]
+  %incdec.ptr.lcssa = phi i8* [ %incdec.ptr, %for.inc ]
+  br label %for.cond.loopexit
+
+for.cond.loopexit:                                ; preds = %for.body, %for.cond1.for.cond.loopexit_crit_edge
+  %r.1.lcssa = phi i16* [ %add.ptr.lcssa, %for.cond1.for.cond.loopexit_crit_edge ], [ %r.022, %for.body ]
+  %a.1.lcssa = phi i8* [ %incdec.ptr.lcssa, %for.cond1.for.cond.loopexit_crit_edge ], [ %a.021, %for.body ]
+  %tmp1 = load i32, i32* @b, align 4
+  %tobool = icmp eq i32 %tmp1, 0
+  br i1 %tobool, label %for.cond.for.end6_crit_edge, label %for.body
+
+for.body:                                         ; preds = %for.cond.loopexit, %for.body.lr.ph
+  %r.022 = phi i16* [ undef, %for.body.lr.ph ], [ %r.1.lcssa, %for.cond.loopexit ]
+  %a.021 = phi i8* [ undef, %for.body.lr.ph ], [ %a.1.lcssa, %for.cond.loopexit ]
+  %tmp2 = load i32, i32* @c, align 4
+  %tobool215 = icmp eq i32 %tmp2, 0
+  br i1 %tobool215, label %for.cond.loopexit, label %for.body3.lr.ph
+
+for.body3.lr.ph:                                  ; preds = %for.body
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.inc, %for.body3.lr.ph
+  %dec18.in = phi i32 [ %tmp2, %for.body3.lr.ph ], [ %dec18, %for.inc ]
+  %r.117 = phi i16* [ %r.022, %for.body3.lr.ph ], [ %add.ptr, %for.inc ]
+  %a.116 = phi i8* [ %a.021, %for.body3.lr.ph ], [ %incdec.ptr, %for.inc ]
+  %dec18 = add nsw i32 %dec18.in, -1
+  %tmp3 = load i8, i8* %a.116, align 1
+  %cmp = icmp eq i8 %tmp3, 0
+  br i1 %cmp, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body3
+  %arrayidx = getelementptr inbounds i16, i16* %r.117, i64 1
+  store i16 0, i16* %arrayidx, align 2
+  store i16 0, i16* %r.117, align 2
+  %arrayidx5 = getelementptr inbounds i16, i16* %r.117, i64 2
+  store i16 0, i16* %arrayidx5, align 2
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body3
+  %incdec.ptr = getelementptr inbounds i8, i8* %a.116, i64 1
+  %add.ptr = getelementptr inbounds i16, i16* %r.117, i64 3
+  %tobool2 = icmp eq i32 %dec18, 0
+  br i1 %tobool2, label %for.cond1.for.cond.loopexit_crit_edge, label %for.body3, !llvm.loop !0
+
+for.cond.for.end6_crit_edge:                      ; preds = %for.cond.loopexit
+  br label %for.end6
+
+for.end6:                                         ; preds = %for.cond.for.end6_crit_edge, %entry
+  ret void
+}
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.unroll.count", i32 2}

Added: llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanuppad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanuppad.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanuppad.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-cleanuppad.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt -S -loop-unroll %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %phi = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  invoke void @callee(i32 %phi)
+          to label %for.inc unwind label %ehcleanup
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nuw nsw i32 %phi, 1
+  %cmp = icmp slt i32 %inc, 3
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.inc
+  call void @dtor()
+  ret void
+
+ehcleanup:                                        ; preds = %for.body
+  %cp = cleanuppad within none []
+  call void @dtor() [ "funclet"(token %cp) ]
+  cleanupret from %cp unwind to caller
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: invoke void @callee(i32 0
+
+; CHECK: invoke void @callee(i32 1
+
+; CHECK: invoke void @callee(i32 2
+
+declare void @callee(i32)
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @dtor()

Added: llvm/trunk/test/Transforms/LoopUnroll/unroll-count.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-count.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-count.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-count.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt < %s -S -loop-unroll -unroll-count=2 | FileCheck %s
+; Checks that "llvm.loop.unroll.disable" is set when
+; unroll with count set by user has been applied.
+;
+; CHECK-LABEL: @foo(
+; CHECK: llvm.loop.unroll.disable
+
+define void @foo(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,59 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-threshold=40 -unroll-max-percent-threshold-boost=100 | FileCheck %s
+
+ at known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+
+; CHECK-LABEL: @bar_prof
+; CHECK: loop:
+; CHECK: %mul = mul
+; CHECK: %mul.1 = mul
+; CHECK: %mul.2 = mul
+; CHECK: %mul.3 = mul
+; CHECK: loop.epil:
+define i32 @bar_prof(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i32 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %mul = mul nsw i32 %src_element, %const_array_element
+  %add = add nsw i32 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, %c
+  br i1 %exitcond86.i, label %loop.end, label %loop, !prof !2
+
+loop.end:
+  %r.lcssa = phi i32 [ %r, %loop ]
+  ret i32 %r.lcssa
+}
+
+; CHECK-LABEL: @bar_prof_flat
+; CHECK-NOT: loop.epil
+define i32 @bar_prof_flat(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i32 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %mul = mul nsw i32 %src_element, %const_array_element
+  %add = add nsw i32 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, %c
+  br i1 %exitcond86.i, label %loop, label %loop.end, !prof !2
+
+loop.end:
+  %r.lcssa = phi i32 [ %r, %loop ]
+  ret i32 %r.lcssa
+}
+
+!1 = !{!"function_entry_count", i64 1}
+!2 = !{!"branch_weights", i32 1, i32 1000}

Added: llvm/trunk/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,107 @@
+; This test exercises that we don't corrupt a loop-analysis when running loop
+; unrolling in a way that deletes a loop. To do that, we first ensure the
+; analysis is cached, then unroll the loop (deleting it) and make sure that the
+; next function doesn't get a cache "hit" for this stale analysis result.
+;
+; RUN: opt -S -passes='loop(require<access-info>),unroll,loop(print-access-info)' -debug-pass-manager < %s 2>&1 | FileCheck %s
+;
+; CHECK: Starting llvm::Function pass manager run.
+; CHECK: Running pass: FunctionToLoopPassAdaptor
+; CHECK: Running analysis: LoopAnalysis
+; CHECK: Running analysis: InnerAnalysisManagerProxy<
+; CHECK: Starting Loop pass manager run.
+; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis
+; CHECK: Running analysis: LoopAccessAnalysis on inner1.header
+; CHECK: Finished Loop pass manager run.
+; CHECK: Starting Loop pass manager run.
+; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis
+; CHECK: Running analysis: LoopAccessAnalysis on inner2.header
+; CHECK: Finished Loop pass manager run.
+; CHECK: Starting Loop pass manager run.
+; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis
+; CHECK: Running analysis: LoopAccessAnalysis on outer.header
+; CHECK: Finished Loop pass manager run.
+; CHECK: Running pass: LoopUnrollPass
+; CHECK: Clearing all analysis results for: inner2.header
+; CHECK: Clearing all analysis results for: outer.header
+; CHECK: Invalidating all non-preserved analyses for: test
+; CHECK: Invalidating all non-preserved analyses for: inner1.header
+; CHECK: Invalidating analysis: LoopAccessAnalysis on inner1.header
+; CHECK: Invalidating all non-preserved analyses for: inner1.header.1
+; CHECK-NOT: Invalidating analysis: LoopAccessAnalysis on inner1.header.1
+; CHECK: Running pass: FunctionToLoopPassAdaptor
+; CHECK: Starting Loop pass manager run.
+; CHECK: Running pass: LoopAccessInfoPrinterPass
+; CHECK: Running analysis: LoopAccessAnalysis on inner1.header
+; CHECK: Loop access info in function 'test':
+; CHECK:   inner1.header:
+; CHECK: Finished Loop pass manager run.
+; CHECK: Starting Loop pass manager run.
+; CHECK: Running pass: LoopAccessInfoPrinterPass
+; CHECK: Running analysis: LoopAccessAnalysis on inner1.header.1
+; CHECK: Loop access info in function 'test':
+; CHECK:   inner1.header.1:
+; CHECK: Finished Loop pass manager run.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test(i32 %inner1.count) {
+; CHECK-LABEL: define void @test(
+bb:
+  br label %outer.ph
+
+outer.ph:
+  br label %outer.header
+
+outer.header:
+  %outer.i = phi i32 [ 0, %outer.ph ], [ %outer.i.next, %outer.latch ]
+  br label %inner1.ph
+
+inner1.ph:
+  br label %inner1.header
+
+inner1.header:
+  %inner1.i = phi i32 [ 0, %inner1.ph ], [ %inner1.i.next, %inner1.header ]
+  %inner1.i.next = add i32 %inner1.i, 1
+  %inner1.cond = icmp eq i32 %inner1.i, %inner1.count
+  br i1 %inner1.cond, label %inner1.exit, label %inner1.header
+; We should have two unrolled copies of this loop and nothing else.
+;
+; CHECK-NOT:     icmp eq
+; CHECK-NOT:     br i1
+; CHECK:         %[[COND1:.*]] = icmp eq i32 %{{.*}}, %inner1.count
+; CHECK:         br i1 %[[COND1]],
+; CHECK-NOT:     icmp eq
+; CHECK-NOT:     br i1
+; CHECK:         %[[COND2:.*]] = icmp eq i32 %{{.*}}, %inner1.count
+; CHECK:         br i1 %[[COND2]],
+; CHECK-NOT:     icmp eq
+; CHECK-NOT:     br i1
+
+
+inner1.exit:
+  br label %inner2.ph
+
+inner2.ph:
+  br label %inner2.header
+
+inner2.header:
+  %inner2.i = phi i32 [ 0, %inner2.ph ], [ %inner2.i.next, %inner2.header ]
+  %inner2.i.next = add i32 %inner2.i, 1
+  %inner2.cond = icmp eq i32 %inner2.i, 4
+  br i1 %inner2.cond, label %inner2.exit, label %inner2.header
+
+inner2.exit:
+  br label %outer.latch
+
+outer.latch:
+  %outer.i.next = add i32 %outer.i, 1
+  %outer.cond = icmp eq i32 %outer.i.next, 2
+  br i1 %outer.cond, label %outer.exit, label %outer.header
+
+outer.exit:
+  br label %exit
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnroll/unroll-maxcount.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-maxcount.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-maxcount.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-maxcount.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; RUN: opt < %s -S -loop-unroll -unroll-allow-partial -unroll-max-count=1 | FileCheck %s
+; Checks that unroll MaxCount is honored.
+;
+; CHECK-LABEL: @foo(
+; CHECK-LABEL: for.body:
+; CHECK-NEXT: phi
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: load
+; CHECK-NEXT: add
+; CHECK-NEXT: store
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+; CHECK-NEXT: br
+define void @foo(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,176 @@
+; RUN: opt < %s -S -loop-unroll -unroll-count=4 | FileCheck -check-prefix=CHECK_COUNT4 %s
+; RUN: opt < %s -S -loop-unroll | FileCheck -check-prefix=CHECK_NOCOUNT %s
+; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso | FileCheck -check-prefix=PGSO %s
+; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso=false | FileCheck -check-prefix=NPGSO %s
+
+
+;///////////////////// TEST 1 //////////////////////////////
+
+; This test shows that the loop is unrolled according to the specified
+; unroll factor.
+
+define void @Test1() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK_COUNT4-LABEL: @Test1
+; CHECK_COUNT4:      phi
+; CHECK_COUNT4-NEXT: add
+; CHECK_COUNT4-NEXT: add
+; CHECK_COUNT4-NEXT: add
+; CHECK_COUNT4-NEXT: add
+; CHECK_COUNT4-NEXT: icmp
+
+
+;///////////////////// TEST 2 //////////////////////////////
+
+; This test shows that with optnone attribute, the loop is not unrolled
+; even if an unroll factor was specified.
+
+define void @Test2() nounwind optnone noinline {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK_COUNT4-LABEL: @Test2
+; CHECK_COUNT4:      phi
+; CHECK_COUNT4-NEXT: add
+; CHECK_COUNT4-NEXT: icmp
+
+
+;///////////////////// TEST 3 //////////////////////////////
+
+; This test shows that this loop is fully unrolled by default.
+
+ at tab = common global [24 x i32] zeroinitializer, align 4
+
+define i32 @Test3() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [24 x i32], [24 x i32]* @tab, i32 0, i32 %i.05
+  store i32 %i.05, i32* %arrayidx, align 4
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 24
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 42
+}
+
+; CHECK_NOCOUNT-LABEL: @Test3
+; CHECK_NOCOUNT:      store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: ret
+
+
+;///////////////////// TEST 4 //////////////////////////////
+
+; This test shows that with optsize attribute, this loop is not unrolled.
+
+define i32 @Test4() optsize {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [24 x i32], [24 x i32]* @tab, i32 0, i32 %i.05
+  store i32 %i.05, i32* %arrayidx, align 4
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 24
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 42
+}
+
+; CHECK_NOCOUNT-LABEL: @Test4
+; CHECK_NOCOUNT:      phi
+; CHECK_NOCOUNT:      icmp
+
+;///////////////////// TEST 5 //////////////////////////////
+
+; This test shows that with PGO, this loop is cold and not unrolled.
+
+define i32 @Test5() !prof !14 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [24 x i32], [24 x i32]* @tab, i32 0, i32 %i.05
+  store i32 %i.05, i32* %arrayidx, align 4
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 24
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 42
+}
+
+; PGSO-LABEL: @Test5
+; PGSO:      phi
+; PGSO:      icmp
+; NPGSO-LABEL: @Test5
+; NPGSO-NOT:      phi
+; NPGSO-NOT:      icmp
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}

Added: llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,149 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+;
+; Verify that the unrolling pass removes existing unroll count metadata
+; and adds a disable unrolling node after unrolling is complete.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; #pragma clang loop  vectorize(enable) unroll_count(4) vectorize_width(8)
+;
+; Unroll count metadata should be replaced with unroll(disable).  Vectorize
+; metadata should be untouched.
+;
+; CHECK-LABEL: @unroll_count_4(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
+define void @unroll_count_4(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!1 = !{!1, !2, !3, !4}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
+!3 = !{!"llvm.loop.unroll.count", i32 4}
+!4 = !{!"llvm.loop.vectorize.width", i32 8}
+
+; #pragma clang loop unroll(full)
+;
+; An unroll disable metadata node is only added for the unroll count case.
+; In this case, the loop has a full unroll metadata but can't be fully unrolled
+; because the trip count is dynamic.  The full unroll metadata should remain
+; after unrolling.
+;
+; CHECK-LABEL: @unroll_full(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]]
+define void @unroll_full(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !5
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!5 = !{!5, !6}
+!6 = !{!"llvm.loop.unroll.full"}
+
+; #pragma clang loop unroll(disable)
+;
+; Unroll metadata should not change.
+;
+; CHECK-LABEL: @unroll_disable(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
+define void @unroll_disable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!7 = !{!7, !8}
+!8 = !{!"llvm.loop.unroll.disable"}
+
+; This function contains two loops which share the same llvm.loop metadata node
+; with an llvm.loop.unroll.count 2 hint.  Both loops should be unrolled.  This
+; verifies that adding disable metadata to a loop after unrolling doesn't affect
+; other loops which previously shared the same llvm.loop metadata.
+;
+; CHECK-LABEL: @shared_metadata(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
+define void @shared_metadata(i32* nocapture %List) #0 {
+entry:
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds i32, i32* %List, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add4 = add nsw i32 %0, 10
+  store i32 %add4, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.body3.1.preheader, label %for.body3, !llvm.loop !9
+
+for.body3.1.preheader:                            ; preds = %for.body3
+  br label %for.body3.1
+
+for.body3.1:                                      ; preds = %for.body3.1.preheader, %for.body3.1
+  %indvars.iv.1 = phi i64 [ %1, %for.body3.1 ], [ 0, %for.body3.1.preheader ]
+  %1 = add nsw i64 %indvars.iv.1, 1
+  %arrayidx.1 = getelementptr inbounds i32, i32* %List, i64 %1
+  %2 = load i32, i32* %arrayidx.1, align 4
+  %add4.1 = add nsw i32 %2, 10
+  store i32 %add4.1, i32* %arrayidx.1, align 4
+  %exitcond.1 = icmp eq i64 %1, 4
+  br i1 %exitcond.1, label %for.inc5.1, label %for.body3.1, !llvm.loop !9
+
+for.inc5.1:                                       ; preds = %for.body3.1
+  ret void
+}
+!9 = !{!9, !10}
+!10 = !{!"llvm.loop.unroll.count", i32 2}
+
+
+; CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[VEC_ENABLE:.*]], ![[WIDTH_8:.*]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
+; CHECK: ![[WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
+; CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[UNROLL_FULL:.*]]}
+; CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"}
+; CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]]}

Added: llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,372 @@
+; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
+; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
+; RUN: opt < %s -loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s
+;
+; Run loop unrolling twice to verify that loop unrolling metadata is properly
+; removed and further unrolling is disabled after the pass is run once.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; loop4 contains a small loop which should be completely unrolled by
+; the default unrolling heuristics.  It serves as a control for the
+; unroll(disable) pragma test loop4_with_disable.
+;
+; CHECK-LABEL: @loop4(
+; CHECK-NOT: br i1
+define void @loop4(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; #pragma clang loop unroll(disable)
+;
+; CHECK-LABEL: @loop4_with_disable(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @loop4_with_disable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.unroll.disable"}
+
+; loop64 has a high enough count that it should *not* be unrolled by
+; the default unrolling heuristic.  It serves as the control for the
+; unroll(full) pragma test loop64_with_.* tests below.
+;
+; CHECK-LABEL: @loop64(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @loop64(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; #pragma clang loop unroll(full)
+; Loop should be fully unrolled.
+;
+; CHECK-LABEL: @loop64_with_full(
+; CHECK-NOT: br i1
+define void @loop64_with_full(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!3 = !{!3, !4}
+!4 = !{!"llvm.loop.unroll.full"}
+
+; #pragma clang loop unroll_count(4)
+; Loop should be unrolled 4 times.
+;
+; CHECK-LABEL: @loop64_with_count4(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @loop64_with_count4(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!5 = !{!5, !6}
+!6 = !{!"llvm.loop.unroll.count", i32 4}
+
+; #pragma clang loop unroll(full)
+; Full unrolling is requested, but loop has a runtime trip count so
+; no unrolling should occur.
+;
+; CHECK-LABEL: @runtime_loop_with_full(
+; CHECK: store i32
+; CHECK-NOT: store i32
+define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!8 = !{!8, !4}
+
+; #pragma clang loop unroll_count(4)
+; Loop has a runtime trip count.  Runtime unrolling should occur and loop
+; should be duplicated (original and 4x unrolled) if remainder is allowed,
+; otherwise loop should not be unrolled.
+;
+; CHECK-LABEL: @runtime_loop_with_count4(
+; CHECK: for.body
+; CHECK: store
+; REM: store
+; REM: store
+; REM: store
+; CHECK-NOT: store
+; CHECK: br i1
+; REM: for.body.epil:
+; REM: store
+; NOREM-NOT: for.body.epil:
+; NOREM-NOT: store
+; CHECK-NOT: store
+; REM: br i1
+; NOREM-NOT: br i1
+define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!9 = !{!9, !6}
+
+; #pragma clang loop unroll_count(1)
+; Loop should not be unrolled
+;
+; CHECK-LABEL: @unroll_1(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @unroll_1(i32* nocapture %a, i32 %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!10 = !{!10, !11}
+!11 = !{!"llvm.loop.unroll.count", i32 1}
+
+; #pragma clang loop unroll(full)
+; Loop has very high loop count (1 million) and full unrolling was requested.
+; Loop should unrolled up to the pragma threshold, but not completely.
+;
+; CHECK-LABEL: @unroll_1M(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1
+define void @unroll_1M(i32* nocapture %a, i32 %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!12 = !{!12, !4}
+
+; #pragma clang loop unroll(enable)
+; Loop should be fully unrolled.
+;
+; CHECK-LABEL: @loop64_with_enable(
+; CHECK-NOT: br i1
+define void @loop64_with_enable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!13 = !{!13, !14}
+!14 = !{!"llvm.loop.unroll.enable"}
+
+; #pragma clang loop unroll(enable)
+; Loop has a runtime trip count and should be runtime unrolled and duplicated
+; (original and 8x) if remainder is allowed, otherwise it should not be
+; unrolled.
+;
+; CHECK-LABEL: @runtime_loop_with_enable(
+; CHECK: for.body:
+; CHECK: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; REM: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+; REM: for.body.epil:
+; NOREM-NOT: for.body.epil:
+; REM: store
+; CHECK-NOT: store
+; REM: br i1
+; NOREM-NOT: br i1
+define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!15 = !{!15, !14}
+
+; #pragma clang loop unroll_count(3)
+; Loop has a runtime trip count.  Runtime unrolling should occur and loop
+; should be duplicated (original and 3x unrolled) if remainder is allowed,
+; otherwise it should not be unrolled.
+;
+; CHECK-LABEL: @runtime_loop_with_count3(
+; CHECK: for.body
+; CHECK: store
+; REM: store
+; REM: store
+; CHECK-NOT: store
+; CHECK: br i1
+; REM: for.body.epil:
+; REM: store
+; NOREM-NOT: for.body.epil:
+; NOREM-NOT: store
+; CHECK-NOT: store
+; REM: br i1
+define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!16 = !{!16, !17}
+!17 = !{!"llvm.loop.unroll.count", i32 3}

Added: llvm/trunk/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s
+; RUN: opt -S < %s -passes='require<opt-remark-emit>,unroll,require<block-freq>' | FileCheck %s
+; Crasher from PR20987.
+
+; CHECK: define void @update_loop_info_in_subloops
+; CHECK: entry:
+; CHECK: L:
+; CHECK: L.inner:
+; CHECK: L.inner.latch:
+; CHECK: L.latch:
+; CHECK: L.inner.1:
+; CHECK: L.inner.latch.1:
+; CHECK: L.latch.1:
+
+define void @update_loop_info_in_subloops() {
+entry:
+  br label %L
+
+L:
+  %0 = phi i64 [ 1, %entry ], [ %1, %L.latch ]
+  br label %L.inner
+
+L.inner:
+  br label %L.inner.latch
+
+L.inner.latch:
+  br i1 false, label %L.latch, label %L.inner
+
+L.latch:
+  %1 = add i64 %0, 1
+  %2 = icmp eq i64 %1, 3
+  br i1 %2, label %exit, label %L
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnrollAndJam/dependencies.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnrollAndJam/dependencies.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnrollAndJam/dependencies.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnrollAndJam/dependencies.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,470 @@
+; RUN: opt -basicaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK-LABEL: fore_aft_less
+; CHECK: %j = phi
+; CHECK: %j.1 = phi
+; CHECK: %j.2 = phi
+; CHECK: %j.3 = phi
+define void @fore_aft_less(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add6 = add nuw nsw i32 %j, 1
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %add72 = add nuw nsw i32 %i, -1
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add72
+  store i32 %add, i32* %arrayidx8, align 4
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}
+
+
+; CHECK-LABEL: fore_aft_eq
+; CHECK: %j = phi
+; CHECK: %j.1 = phi
+; CHECK: %j.2 = phi
+; CHECK: %j.3 = phi
+define void @fore_aft_eq(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add6 = add nuw nsw i32 %j, 1
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %add72 = add nuw nsw i32 %i, 0
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add, i32* %arrayidx8, align 4
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}
+
+
+; CHECK-LABEL: fore_aft_more
+; CHECK: %j = phi
+; CHECK-NOT: %j.1 = phi
+define void @fore_aft_more(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add6 = add nuw nsw i32 %j, 1
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %add72 = add nuw nsw i32 %i, 1
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add72
+  store i32 %add, i32* %arrayidx8, align 4
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}
+
+
+; CHECK-LABEL: fore_sub_less
+; CHECK: %j = phi
+; CHECK: %j.1 = phi
+; CHECK: %j.2 = phi
+; CHECK: %j.3 = phi
+define void @fore_sub_less(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add72 = add nuw nsw i32 %i, -1
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add72
+  store i32 %add, i32* %arrayidx8, align 4
+  %add6 = add nuw nsw i32 %j, 1
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}
+
+
+; CHECK-LABEL: fore_sub_eq
+; CHECK: %j = phi
+; CHECK: %j.1 = phi
+; CHECK: %j.2 = phi
+; CHECK: %j.3 = phi
+define void @fore_sub_eq(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add72 = add nuw nsw i32 %i, 0
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add72
+  store i32 %add, i32* %arrayidx8, align 4
+  %add6 = add nuw nsw i32 %j, 1
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}
+
+
+; CHECK-LABEL: fore_sub_more
+; CHECK: %j = phi
+; CHECK-NOT: %j.1 = phi
+define void @fore_sub_more(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add72 = add nuw nsw i32 %i, 1
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add72
+  store i32 %add, i32* %arrayidx8, align 4
+  %add6 = add nuw nsw i32 %j, 1
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}
+
+
+; CHECK-LABEL: sub_aft_less
+; CHECK: %j = phi
+; CHECK: %j.1 = phi
+; CHECK: %j.2 = phi
+; CHECK: %j.3 = phi
+define void @sub_aft_less(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add6 = add nuw nsw i32 %j, 1
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %add72 = add nuw nsw i32 %i, -1
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add72
+  store i32 %add, i32* %arrayidx8, align 4
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}
+
+
+; CHECK-LABEL: sub_aft_eq
+; CHECK: %j = phi
+; CHECK: %j.1 = phi
+; CHECK: %j.2 = phi
+; CHECK: %j.3 = phi
+define void @sub_aft_eq(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add6 = add nuw nsw i32 %j, 1
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %add72 = add nuw nsw i32 %i, 0
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add, i32* %arrayidx8, align 4
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}
+
+
+; CHECK-LABEL: sub_aft_more
+; CHECK: %j = phi
+; CHECK-NOT: %j.1 = phi
+define void @sub_aft_more(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add6 = add nuw nsw i32 %j, 1
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %add72 = add nuw nsw i32 %i, 1
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add72
+  store i32 %add, i32* %arrayidx8, align 4
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}
+
+
+; CHECK-LABEL: sub_sub_less
+; CHECK: %j = phi
+; CHECK-NOT: %j.1 = phi
+define void @sub_sub_less(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add6 = add nuw nsw i32 %j, 1
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  %add72 = add nuw nsw i32 %i, -1
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add72
+  store i32 %add, i32* %arrayidx8, align 4
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}
+
+
+; CHECK-LABEL: sub_sub_eq
+; CHECK: %j = phi
+; CHECK: %j.1 = phi
+define void @sub_sub_eq(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add6 = add nuw nsw i32 %j, 1
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  %add72 = add nuw nsw i32 %i, 0
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add72
+  store i32 %add, i32* %arrayidx8, align 4
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}
+
+
+; CHECK-LABEL: sub_sub_more
+; CHECK: %j = phi
+; CHECK-NOT: %j.1 = phi
+define void @sub_sub_more(i32* noalias nocapture %A, i32 %N, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp sgt i32 %N, 0
+  br i1 %cmp, label %for.outer, label %cleanup
+
+for.outer:
+  %i = phi i32 [ %add7, %for.latch ], [ 0, %entry ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
+  %sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
+  %arrayidx5 = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add6 = add nuw nsw i32 %j, 1
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 1, i32* %arrayidx, align 4
+  %add72 = add nuw nsw i32 %i, 1
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add72
+  store i32 %add, i32* %arrayidx8, align 4
+  %exitcond = icmp eq i32 %add6, %N
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add7 = add nuw nsw i32 %i, 1
+  %exitcond29 = icmp eq i32 %add7, %N
+  br i1 %exitcond29, label %cleanup, label %for.outer
+
+cleanup:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnrollAndJam/disable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnrollAndJam/disable.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnrollAndJam/disable.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnrollAndJam/disable.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,741 @@
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -pass-remarks=loop-unroll-and-jam < %s -S 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+;; Common check for all tests. None should be unroll and jammed
+; CHECK-NOT: remark: {{.*}} unroll and jammed
+
+
+; CHECK-LABEL: disabled1
+; Tests for(i) { sum = A[i]; for(j) sum += B[j]; A[i+1] = sum; }
+; A[i] to A[i+1] dependency should block unrollandjam
+define void @disabled1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i.029 = phi i32 [ %add10, %for.latch ], [ 0, %for.preheader ]
+; CHECK: %j.026 = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp127 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp127, %cmp
+  br i1 %or.cond, label %for.preheader, label %return
+
+for.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.029 = phi i32 [ %add10, %for.latch ], [ 0, %for.preheader ]
+  %b.028 = phi i32 [ %inc8, %for.latch ], [ 1, %for.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.029
+  %0 = load i32, i32* %arrayidx, align 4
+  br label %for.inner
+
+for.inner:
+  %j.026 = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1.025 = phi i32 [ %0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %B, i32 %j.026
+  %1 = load i32, i32* %arrayidx6, align 4
+  %add = add i32 %1, %sum1.025
+  %inc = add nuw i32 %j.026, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %b.028
+  store i32 %add, i32* %arrayidx7, align 4
+  %inc8 = add nuw nsw i32 %b.028, 1
+  %add10 = add nuw nsw i32 %i.029, 1
+  %exitcond30 = icmp eq i32 %add10, %I
+  br i1 %exitcond30, label %return, label %for.outer
+
+return:
+  ret void
+}
+
+
+; CHECK-LABEL: disabled2
+; Tests an incompatible block layout (for.outer jumps past for.inner)
+; FIXME: Make this work
+define void @disabled2(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i.032 = phi i32 [ %add13, %for.latch ], [ 0, %for.preheader ]
+; CHECK: %j.030 = phi i32 [ %inc, %for.inner ], [ 0, %for.inner.preheader ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp131 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp131, %cmp
+  br i1 %or.cond, label %for.preheader, label %for.end14
+
+for.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.032 = phi i32 [ %add13, %for.latch ], [ 0, %for.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.032
+  %0 = load i32, i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.latch, label %for.inner
+
+for.inner:
+  %j.030 = phi i32 [ %inc, %for.inner ], [ 0, %for.outer ]
+  %sum1.029 = phi i32 [ %sum1.1, %for.inner ], [ 0, %for.outer ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %B, i32 %j.030
+  %1 = load i32, i32* %arrayidx6, align 4
+  %tobool7 = icmp eq i32 %1, 0
+  %sub = add i32 %sum1.029, 10
+  %add = sub i32 %sub, %1
+  %sum1.1 = select i1 %tobool7, i32 %sum1.029, i32 %add
+  %inc = add nuw i32 %j.030, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %sum1.1.lcssa = phi i32 [ 0, %for.outer ], [ %sum1.1, %for.inner ]
+  %arrayidx11 = getelementptr inbounds i32, i32* %A, i32 %i.032
+  store i32 %sum1.1.lcssa, i32* %arrayidx11, align 4
+  %add13 = add nuw i32 %i.032, 1
+  %exitcond33 = icmp eq i32 %add13, %I
+  br i1 %exitcond33, label %for.end14, label %for.outer
+
+for.end14:
+  ret void
+}
+
+
+; CHECK-LABEL: disabled3
+; Tests loop carry dependencies in an array S
+define void @disabled3(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i.029 = phi i32 [ 0, %for.preheader ], [ %add12, %for.latch ]
+; CHECK: %j.027 = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+entry:
+  %S = alloca [4 x i32], align 4
+  %cmp = icmp eq i32 %J, 0
+  br i1 %cmp, label %return, label %if.end
+
+if.end:
+  %0 = bitcast [4 x i32]* %S to i8*
+  %cmp128 = icmp eq i32 %I, 0
+  br i1 %cmp128, label %for.cond.cleanup, label %for.preheader
+
+for.preheader:
+  %arrayidx9 = getelementptr inbounds [4 x i32], [4 x i32]* %S, i32 0, i32 0
+  br label %for.outer
+
+for.cond.cleanup:
+  br label %return
+
+for.outer:
+  %i.029 = phi i32 [ 0, %for.preheader ], [ %add12, %for.latch ]
+  br label %for.inner
+
+for.inner:
+  %j.027 = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j.027
+  %l2 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %j.027, %i.029
+  %rem = urem i32 %add, %J
+  %arrayidx6 = getelementptr inbounds i32, i32* %B, i32 %rem
+  %l3 = load i32, i32* %arrayidx6, align 4
+  %mul = mul i32 %l3, %l2
+  %rem7 = urem i32 %j.027, 3
+  %arrayidx8 = getelementptr inbounds [4 x i32], [4 x i32]* %S, i32 0, i32 %rem7
+  store i32 %mul, i32* %arrayidx8, align 4
+  %inc = add nuw i32 %j.027, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %l1 = load i32, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32, i32* %A, i32 %i.029
+  store i32 %l1, i32* %arrayidx10, align 4
+  %add12 = add nuw i32 %i.029, 1
+  %exitcond31 = icmp eq i32 %add12, %I
+  br i1 %exitcond31, label %for.cond.cleanup, label %for.outer
+
+return:
+  ret void
+}
+
+
+; CHECK-LABEL: disabled4
+; Inner looop induction variable is not consistent
+; ie for(i = 0..n) for (j = 0..i) sum+=B[j]
+define void @disabled4(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %indvars.iv = phi i32 [ %indvars.iv.next, %for.latch ], [ 1, %for.preheader ]
+; CHECK: %j.021 = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ugt i32 %I, 1
+  %or.cond = and i1 %cmp122, %cmp
+  br i1 %or.cond, label %for.preheader, label %for.end9
+
+for.preheader:
+  br label %for.outer
+
+for.outer:
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.latch ], [ 1, %for.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.021 = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1.020 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j.021
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1.020
+  %inc = add nuw i32 %j.021, 1
+  %exitcond = icmp eq i32 %inc, %indvars.iv
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
+  store i32 %add, i32* %arrayidx6, align 4
+  %indvars.iv.next = add nuw i32 %indvars.iv, 1
+  %exitcond24 = icmp eq i32 %indvars.iv.next, %I
+  br i1 %exitcond24, label %for.end9, label %for.outer
+
+for.end9:
+  ret void
+}
+
+
+; CHECK-LABEL: disabled5
+; Test odd uses of phi nodes where the outer IV cannot be moved into Fore as it hits a PHI
+ at f = hidden global i32 0, align 4
+define i32 @disabled5() #0 {
+; CHECK: %0 = phi i32 [ %f.promoted10, %entry ], [ 2, %for.latch ]
+; CHECK: %1 = phi i32 [ %0, %for.outer ], [ 2, %for.inner ]
+entry:
+  %f.promoted10 = load i32, i32* @f, align 4
+  br label %for.outer
+
+for.outer:
+  %0 = phi i32 [ %f.promoted10, %entry ], [ 2, %for.latch ]
+  %d.018 = phi i16 [ 0, %entry ], [ %odd.lcssa, %for.latch ]
+  %inc5.sink9 = phi i32 [ 2, %entry ], [ %inc5, %for.latch ]
+  br label %for.inner
+
+for.inner:
+  %1 = phi i32 [ %0, %for.outer ], [ 2, %for.inner ]
+  %inc.sink8 = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %inc = add nuw nsw i32 %inc.sink8, 1
+  %exitcond = icmp ne i32 %inc, 7
+  br i1 %exitcond, label %for.inner, label %for.latch
+
+for.latch:
+  %.lcssa = phi i32 [ %1, %for.inner ]
+  %odd.lcssa = phi i16 [ 1, %for.inner ]
+  %inc5 = add nuw nsw i32 %inc5.sink9, 1
+  %exitcond11 = icmp ne i32 %inc5, 7
+  br i1 %exitcond11, label %for.outer, label %for.end
+
+for.end:
+  %.lcssa.lcssa = phi i32 [ %.lcssa, %for.latch ]
+  %inc.lcssa.lcssa = phi i32 [ 7, %for.latch ]
+  ret i32 0
+}
+
+
+; CHECK-LABEL: disabled6
+; There is a dependency in here, between @d and %0 (=@f)
+ at d6 = hidden global i16 5, align 2
+ at f6 = hidden global i16* @d6, align 4
+define i32 @disabled6() #0 {
+; CHECK: %inc8.sink14.i = phi i16 [ 1, %entry ], [ %inc8.i, %for.cond.cleanup.i ]
+; CHECK: %c.013.i = phi i32 [ 0, %for.body.i ], [ %inc.i, %for.body6.i ]
+entry:
+  store i16 1, i16* @d6, align 2
+  %0 = load i16*, i16** @f6, align 4
+  br label %for.body.i
+
+for.body.i:
+  %inc8.sink14.i = phi i16 [ 1, %entry ], [ %inc8.i, %for.cond.cleanup.i ]
+  %1 = load i16, i16* %0, align 2
+  br label %for.body6.i
+
+for.cond.cleanup.i:
+  %inc8.i = add nuw nsw i16 %inc8.sink14.i, 1
+  store i16 %inc8.i, i16* @d6, align 2
+  %cmp.i = icmp ult i16 %inc8.i, 6
+  br i1 %cmp.i, label %for.body.i, label %test.exit
+
+for.body6.i:
+  %c.013.i = phi i32 [ 0, %for.body.i ], [ %inc.i, %for.body6.i ]
+  %inc.i = add nuw nsw i32 %c.013.i, 1
+  %exitcond.i = icmp eq i32 %inc.i, 7
+  br i1 %exitcond.i, label %for.cond.cleanup.i, label %for.body6.i
+
+test.exit:
+  %conv2.i = sext i16 %1 to i32
+  ret i32 0
+}
+
+
+; CHECK-LABEL: disabled7
+; Has negative output dependency
+define void @disabled7(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i.028 = phi i32 [ %add11, %for.cond3.for.cond.cleanup5_crit_edge ], [ 0, %for.body.preheader ]
+; CHECK: %j.026 = phi i32 [ 0, %for.body ], [ %add9, %for.body6 ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp127 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp127, %cmp
+  br i1 %or.cond, label %for.body.preheader, label %for.end12
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %i.028 = phi i32 [ %add11, %for.cond3.for.cond.cleanup5_crit_edge ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.028
+  store i32 0, i32* %arrayidx, align 4
+  %sub = add i32 %i.028, -1
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %sub
+  store i32 2, i32* %arrayidx2, align 4
+  br label %for.body6
+
+for.cond3.for.cond.cleanup5_crit_edge:
+  store i32 %add, i32* %arrayidx, align 4
+  %add11 = add nuw i32 %i.028, 1
+  %exitcond29 = icmp eq i32 %add11, %I
+  br i1 %exitcond29, label %for.end12, label %for.body
+
+for.body6:
+  %0 = phi i32 [ 0, %for.body ], [ %add, %for.body6 ]
+  %j.026 = phi i32 [ 0, %for.body ], [ %add9, %for.body6 ]
+  %arrayidx7 = getelementptr inbounds i32, i32* %B, i32 %j.026
+  %1 = load i32, i32* %arrayidx7, align 4
+  %add = add i32 %1, %0
+  %add9 = add nuw i32 %j.026, 1
+  %exitcond = icmp eq i32 %add9, %J
+  br i1 %exitcond, label %for.cond3.for.cond.cleanup5_crit_edge, label %for.body6
+
+for.end12:
+  ret void
+}
+
+
+; CHECK-LABEL: disabled8
+; Same as above with an extra outer loop nest
+define void @disabled8(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i.036 = phi i32 [ %add15, %for.latch ], [ 0, %for.body ]
+; CHECK: %j.034 = phi i32 [ 0, %for.outer ], [ %add13, %for.inner ]
+entry:
+  %cmp = icmp eq i32 %J, 0
+  %cmp335 = icmp eq i32 %I, 0
+  %or.cond = or i1 %cmp, %cmp335
+  br i1 %or.cond, label %for.end18, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %x.037 = phi i32 [ %inc, %for.cond.cleanup4 ], [ 0, %for.body.preheader ]
+  br label %for.outer
+
+for.cond.cleanup4:
+  %inc = add nuw nsw i32 %x.037, 1
+  %exitcond40 = icmp eq i32 %inc, 5
+  br i1 %exitcond40, label %for.end18, label %for.body
+
+for.outer:
+  %i.036 = phi i32 [ %add15, %for.latch ], [ 0, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.036
+  store i32 0, i32* %arrayidx, align 4
+  %sub = add i32 %i.036, -1
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %sub
+  store i32 2, i32* %arrayidx6, align 4
+  br label %for.inner
+
+for.latch:
+  store i32 %add, i32* %arrayidx, align 4
+  %add15 = add nuw i32 %i.036, 1
+  %exitcond38 = icmp eq i32 %add15, %I
+  br i1 %exitcond38, label %for.cond.cleanup4, label %for.outer
+
+for.inner:
+  %0 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %j.034 = phi i32 [ 0, %for.outer ], [ %add13, %for.inner ]
+  %arrayidx11 = getelementptr inbounds i32, i32* %B, i32 %j.034
+  %1 = load i32, i32* %arrayidx11, align 4
+  %add = add i32 %1, %0
+  %add13 = add nuw i32 %j.034, 1
+  %exitcond = icmp eq i32 %add13, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.end18:
+  ret void
+}
+
+
+; CHECK-LABEL: disabled9
+; Can't prove alias between A and B
+define void @disabled9(i32 %I, i32 %J, i32* nocapture %A, i32* nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: disable10
+; Simple call
+declare void @f10(i32, i32) #0
+define void @disable10(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  tail call void @f10(i32 %i, i32 %j) nounwind
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: disable11
+; volatile
+define void @disable11(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load volatile i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: disable12
+; Multiple aft blocks
+define void @disable12(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %add8, %for.latch3 ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch3 ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  %cmpl = icmp eq i32 %add.lcssa, 10
+  br i1 %cmpl, label %for.latch2, label %for.latch3
+
+for.latch2:
+  br label %for.latch3
+
+for.latch3:
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: disable13
+; Two subloops
+define void @disable13(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+; CHECK: %j2 = phi i32 [ %inc2, %for.inner2 ], [ 0, %for.inner2.preheader ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.inner2, label %for.inner
+
+for.inner2:
+  %j2 = phi i32 [ 0, %for.inner ], [ %inc2, %for.inner2 ]
+  %sum12 = phi i32 [ 0, %for.inner ], [ %add2, %for.inner2 ]
+  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %j2
+  %l0 = load i32, i32* %arrayidx2, align 4
+  %add2 = add i32 %l0, %sum12
+  %inc2 = add nuw i32 %j2, 1
+  %exitcond2 = icmp eq i32 %inc2, %J
+  br i1 %exitcond2, label %for.latch, label %for.inner2
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner2 ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: disable14
+; Multiple exits blocks
+define void @disable14(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ %inc, %for.inner ], [ 0, %for.inner.preheader ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  %add8 = add nuw i32 %i, 1
+  %exitcond23 = icmp eq i32 %add8, %I
+  br i1 %exitcond23, label %for.end.loopexit, label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: disable15
+; Latch != exit
+define void @disable15(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ %inc, %for.inner ], [ 0, %for.inner.preheader ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  br label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: disable16
+; Cannot move other before inner loop
+define void @disable16(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  %otherphi = phi i32 [ %other, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  %loadarr = getelementptr inbounds i32, i32* %A, i32 %i
+  %load = load i32, i32* %arrayidx6, align 4
+  %other = add i32 %otherphi, %load
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,50 @@
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=2 -S < %s | FileCheck %s
+;
+; Check that the disable_nonforced loop property is honored by
+; loop unroll-and-jam.
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK-LABEL: disable_nonforced
+; CHECK: load
+; CHECK-NOT: load
+define void @disable_nonforced(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !0
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0, !{!"llvm.loop.disable_nonforced"}}

Added: llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced_count.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced_count.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced_count.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced_count.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,52 @@
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -S < %s | FileCheck %s
+;
+; Verify that the llvm.loop.unroll_and_jam.count loop property overrides
+; llvm.loop.disable_nonforced.
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK-LABEL: @disable_nonforced_enable(
+; CHECK: load
+; CHECK: load
+; CHECK-NOT: load
+; CHECK: br i1
+define void @disable_nonforced_enable(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !0
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0, !{!"llvm.loop.disable_nonforced"}, !{!"llvm.loop.unroll_and_jam.count", i32 2}}

Added: llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced_enable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced_enable.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced_enable.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnrollAndJam/disable_nonforced_enable.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,52 @@
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=2 -S < %s | FileCheck %s
+;
+; Verify that the llvm.loop.unroll_and_jam.enable loop property
+; overrides llvm.loop.disable_nonforced.
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK-LABEL: disable_nonforced_enable
+; CHECK: load
+; CHECK: load
+; CHECK-NOT: load
+; CHECK: br i1
+define void @disable_nonforced_enable(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !0
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0, !{!"llvm.loop.disable_nonforced"}, !{!"llvm.loop.unroll_and_jam.enable"}}

Added: llvm/trunk/test/Transforms/LoopUnrollAndJam/followup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnrollAndJam/followup.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnrollAndJam/followup.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnrollAndJam/followup.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,66 @@
+; RUN: opt -basicaa -tbaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
+;
+; Check that followup attributes are set in the new loops.
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define void @followup(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !0
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !1, !2, !3, !4, !6}
+!1 = !{!"llvm.loop.unroll_and_jam.enable"}
+!2 = !{!"llvm.loop.unroll_and_jam.followup_outer", !{!"FollowupOuter"}}
+!3 = !{!"llvm.loop.unroll_and_jam.followup_inner", !{!"FollowupInner"}}
+!4 = !{!"llvm.loop.unroll_and_jam.followup_all", !{!"FollowupAll"}}
+!6 = !{!"llvm.loop.unroll_and_jam.followup_remainder_inner", !{!"FollowupRemainderInner"}}
+
+
+; CHECK: br i1 %exitcond.3, label %for.latch, label %for.inner, !llvm.loop ![[LOOP_INNER:[0-9]+]]
+; CHECK: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit, label %for.outer, !llvm.loop ![[LOOP_OUTER:[0-9]+]]
+; CHECK: br i1 %exitcond.epil, label %for.latch.epil, label %for.inner.epil, !llvm.loop ![[LOOP_REMAINDER_INNER:[0-9]+]]
+; CHECK: br i1 %exitcond.epil.1, label %for.latch.epil.1, label %for.inner.epil.1, !llvm.loop ![[LOOP_REMAINDER_INNER]]
+; CHECK: br i1 %exitcond.epil.2, label %for.latch.epil.2, label %for.inner.epil.2, !llvm.loop ![[LOOP_REMAINDER_INNER]]
+
+; CHECK: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[FOLLOWUP_ALL:[0-9]+]], ![[FOLLOWUP_INNER:[0-9]+]]}
+; CHECK: ![[FOLLOWUP_ALL]] = !{!"FollowupAll"}
+; CHECK: ![[FOLLOWUP_INNER]] = !{!"FollowupInner"}
+; CHECK: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[FOLLOWUP_ALL]], ![[FOLLOWUP_OUTER:[0-9]+]]}
+; CHECK: ![[FOLLOWUP_OUTER]] = !{!"FollowupOuter"}
+; CHECK: ![[LOOP_REMAINDER_INNER]] = distinct !{![[LOOP_REMAINDER_INNER]], ![[FOLLOWUP_ALL]], ![[FOLLOWUP_REMAINDER_INNER:[0-9]+]]}
+; CHECK: ![[FOLLOWUP_REMAINDER_INNER]] = !{!"FollowupRemainderInner"}

Added: llvm/trunk/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,144 @@
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: function
+; The explicit metadata here should force this to be unroll and jammed 4 times (hence the %.pre60.3)
+; CHECK: %.pre = phi i8 [ %.pre60.3, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader.new ]
+; CHECK: %indvars.iv.3 = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next.3, %for.body4.us ]
+define void @function(i8* noalias nocapture %dst, i32 %dst_stride, i8* noalias nocapture readonly %src, i32 %src_stride, i32 %A, i32 %B, i32 %C, i32 %D, i32 %width, i32 %height) {
+entry:
+  %idxprom = sext i32 %src_stride to i64
+  %cmp52 = icmp sgt i32 %height, 0
+  br i1 %cmp52, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp249 = icmp sgt i32 %width, 0
+  %idx.ext = sext i32 %dst_stride to i64
+  br i1 %cmp249, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup
+
+for.cond1.preheader.us.preheader:                 ; preds = %for.cond1.preheader.lr.ph
+  %.pre.pre = load i8, i8* %src, align 1
+  %wide.trip.count = zext i32 %width to i64
+  br label %for.cond1.preheader.us
+
+for.cond1.preheader.us:                           ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.us.preheader
+  %.pre = phi i8 [ %.pre60, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader ]
+  %srcp.056.us.pn = phi i8* [ %srcp.056.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %src, %for.cond1.preheader.us.preheader ]
+  %y.055.us = phi i32 [ %inc30.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ]
+  %dst.addr.054.us = phi i8* [ %add.ptr.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %dst, %for.cond1.preheader.us.preheader ]
+  %srcp.056.us = getelementptr inbounds i8, i8* %srcp.056.us.pn, i64 %idxprom
+  %.pre60 = load i8, i8* %srcp.056.us, align 1
+  br label %for.body4.us
+
+for.body4.us:                                     ; preds = %for.body4.us, %for.cond1.preheader.us
+  %0 = phi i8 [ %.pre60, %for.cond1.preheader.us ], [ %3, %for.body4.us ]
+  %1 = phi i8 [ %.pre, %for.cond1.preheader.us ], [ %2, %for.body4.us ]
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next, %for.body4.us ]
+  %conv.us = zext i8 %1 to i32
+  %mul.us = mul nsw i32 %conv.us, %A
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %arrayidx8.us = getelementptr inbounds i8, i8* %srcp.056.us.pn, i64 %indvars.iv.next
+  %2 = load i8, i8* %arrayidx8.us, align 1
+  %conv9.us = zext i8 %2 to i32
+  %mul10.us = mul nsw i32 %conv9.us, %B
+  %conv14.us = zext i8 %0 to i32
+  %mul15.us = mul nsw i32 %conv14.us, %C
+  %arrayidx19.us = getelementptr inbounds i8, i8* %srcp.056.us, i64 %indvars.iv.next
+  %3 = load i8, i8* %arrayidx19.us, align 1
+  %conv20.us = zext i8 %3 to i32
+  %mul21.us = mul nsw i32 %conv20.us, %D
+  %add11.us = add i32 %mul.us, 32
+  %add16.us = add i32 %add11.us, %mul10.us
+  %add22.us = add i32 %add16.us, %mul15.us
+  %add23.us = add i32 %add22.us, %mul21.us
+  %4 = lshr i32 %add23.us, 6
+  %conv24.us = trunc i32 %4 to i8
+  %arrayidx26.us = getelementptr inbounds i8, i8* %dst.addr.054.us, i64 %indvars.iv
+  store i8 %conv24.us, i8* %arrayidx26.us, align 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us
+
+for.cond1.for.cond.cleanup3_crit_edge.us:         ; preds = %for.body4.us
+  %add.ptr.us = getelementptr inbounds i8, i8* %dst.addr.054.us, i64 %idx.ext
+  %inc30.us = add nuw nsw i32 %y.055.us, 1
+  %exitcond58 = icmp eq i32 %inc30.us, %height
+  br i1 %exitcond58, label %for.cond.cleanup, label %for.cond1.preheader.us, !llvm.loop !5
+
+for.cond.cleanup:                                 ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.lr.ph, %entry
+  ret void
+}
+
+; CHECK-LABEL: function2
+; The explicit metadata here should force this to be unroll and jammed, but
+; the count is left to thresholds. In this case 2 (hence %.pre60.1).
+; CHECK: %.pre = phi i8 [ %.pre60.1, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader.new ]
+; CHECK: %indvars.iv.1 = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next.1, %for.body4.us ]
+define void @function2(i8* noalias nocapture %dst, i32 %dst_stride, i8* noalias nocapture readonly %src, i32 %src_stride, i32 %A, i32 %B, i32 %C, i32 %D, i32 %width, i32 %height) {
+entry:
+  %idxprom = sext i32 %src_stride to i64
+  %cmp52 = icmp sgt i32 %height, 0
+  br i1 %cmp52, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp249 = icmp sgt i32 %width, 0
+  %idx.ext = sext i32 %dst_stride to i64
+  br i1 %cmp249, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup
+
+for.cond1.preheader.us.preheader:                 ; preds = %for.cond1.preheader.lr.ph
+  %.pre.pre = load i8, i8* %src, align 1
+  %wide.trip.count = zext i32 %width to i64
+  br label %for.cond1.preheader.us
+
+for.cond1.preheader.us:                           ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.us.preheader
+  %.pre = phi i8 [ %.pre60, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader ]
+  %srcp.056.us.pn = phi i8* [ %srcp.056.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %src, %for.cond1.preheader.us.preheader ]
+  %y.055.us = phi i32 [ %inc30.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ]
+  %dst.addr.054.us = phi i8* [ %add.ptr.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %dst, %for.cond1.preheader.us.preheader ]
+  %srcp.056.us = getelementptr inbounds i8, i8* %srcp.056.us.pn, i64 %idxprom
+  %.pre60 = load i8, i8* %srcp.056.us, align 1
+  br label %for.body4.us
+
+for.body4.us:                                     ; preds = %for.body4.us, %for.cond1.preheader.us
+  %0 = phi i8 [ %.pre60, %for.cond1.preheader.us ], [ %3, %for.body4.us ]
+  %1 = phi i8 [ %.pre, %for.cond1.preheader.us ], [ %2, %for.body4.us ]
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next, %for.body4.us ]
+  %conv.us = zext i8 %1 to i32
+  %mul.us = mul nsw i32 %conv.us, %A
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %arrayidx8.us = getelementptr inbounds i8, i8* %srcp.056.us.pn, i64 %indvars.iv.next
+  %2 = load i8, i8* %arrayidx8.us, align 1
+  %conv9.us = zext i8 %2 to i32
+  %mul10.us = mul nsw i32 %conv9.us, %B
+  %conv14.us = zext i8 %0 to i32
+  %mul15.us = mul nsw i32 %conv14.us, %C
+  %arrayidx19.us = getelementptr inbounds i8, i8* %srcp.056.us, i64 %indvars.iv.next
+  %3 = load i8, i8* %arrayidx19.us, align 1
+  %conv20.us = zext i8 %3 to i32
+  %mul21.us = mul nsw i32 %conv20.us, %D
+  %add11.us = add i32 %mul.us, 32
+  %add16.us = add i32 %add11.us, %mul10.us
+  %add22.us = add i32 %add16.us, %mul15.us
+  %add23.us = add i32 %add22.us, %mul21.us
+  %4 = lshr i32 %add23.us, 6
+  %conv24.us = trunc i32 %4 to i8
+  %arrayidx26.us = getelementptr inbounds i8, i8* %dst.addr.054.us, i64 %indvars.iv
+  store i8 %conv24.us, i8* %arrayidx26.us, align 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us
+
+for.cond1.for.cond.cleanup3_crit_edge.us:         ; preds = %for.body4.us
+  %add.ptr.us = getelementptr inbounds i8, i8* %dst.addr.054.us, i64 %idx.ext
+  %inc30.us = add nuw nsw i32 %y.055.us, 1
+  %exitcond58 = icmp eq i32 %inc30.us, %height
+  br i1 %exitcond58, label %for.cond.cleanup, label %for.cond1.preheader.us, !llvm.loop !7
+
+for.cond.cleanup:                                 ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.lr.ph, %entry
+  ret void
+}
+
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.unroll_and_jam.count", i32 4}
+!7 = distinct !{!7, !8}
+!8 = !{!"llvm.loop.unroll_and_jam.enable"}

Added: llvm/trunk/test/Transforms/LoopUnrollAndJam/pragma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnrollAndJam/pragma.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnrollAndJam/pragma.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnrollAndJam/pragma.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,319 @@
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime < %s -S | FileCheck %s
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime -unroll-and-jam-threshold=15 < %s -S | FileCheck %s --check-prefix=CHECK-LOWTHRES
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK-LABEL: test1
+; Basic check that these loops are by default UnJ'd
+define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
+; CHECK-LOWTHRES: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: nounroll_and_jam
+; #pragma nounroll_and_jam
+define void @nounroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !1
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: unroll_and_jam_count
+; #pragma unroll_and_jam(8)
+define void @unroll_and_jam_count(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+; CHECK: %i.us = phi i32 [ %add8.us.7, %for.latch ], [ 0, %for.outer.preheader.new ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !3
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: unroll_and_jam
+; #pragma unroll_and_jam
+define void @unroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
+; CHECK-LOWTHRES: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !5
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: nounroll
+; #pragma nounroll (which we take to mean disable unroll and jam too)
+define void @nounroll(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !7
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: unroll
+; #pragma unroll (which we take to mean disable unroll and jam)
+define void @unroll(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+; CHECK: %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !9
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: nounroll_plus_unroll_and_jam
+; #pragma clang loop nounroll, unroll_and_jam (which we take to mean do unroll_and_jam)
+define void @nounroll_plus_unroll_and_jam(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+; CHECK: %i.us = phi i32 [ %add8.us.{{[1-9]*}}, %for.latch ], [ 0, %for.outer.preheader.new ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !11
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+!1 = distinct !{!1, !2}
+!2 = distinct !{!"llvm.loop.unroll_and_jam.disable"}
+!3 = distinct !{!3, !4}
+!4 = distinct !{!"llvm.loop.unroll_and_jam.count", i32 8}
+!5 = distinct !{!5, !6}
+!6 = distinct !{!"llvm.loop.unroll_and_jam.enable"}
+!7 = distinct !{!7, !8}
+!8 = distinct !{!"llvm.loop.unroll.disable"}
+!9 = distinct !{!9, !10}
+!10 = distinct !{!"llvm.loop.unroll.enable"}
+!11 = distinct !{!11, !8, !6}

Added: llvm/trunk/test/Transforms/LoopUnrollAndJam/unprofitable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnrollAndJam/unprofitable.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnrollAndJam/unprofitable.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnrollAndJam/unprofitable.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,217 @@
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -pass-remarks=loop-unroll < %s -S 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8m.main-arm-none-eabi"
+
+;; Common check for all tests. None should be unroll and jammed due to profitability
+; CHECK-NOT: remark: {{.*}} unroll and jammed
+
+
+; CHECK-LABEL: unprof1
+; Multiple inner loop blocks
+define void @unprof1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %addinc, %for.latch ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner2 ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %addinc, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner2 ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner2 ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+br label %for.inner2
+
+for.inner2:
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner2 ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  %addinc = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %addinc, %I
+  br i1 %exitcond25, label %for.loopexit, label %for.outer
+
+for.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: unprof2
+; Constant inner loop count
+define void @unprof2(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %addinc, %for.latch ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %addinc, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  %addinc = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %addinc, %I
+  br i1 %exitcond25, label %for.loopexit, label %for.outer
+
+for.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: unprof3
+; Complex inner loop
+define void @unprof3(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %addinc, %for.latch ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %addinc, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+  %add0 = add i32 %0, %sum1
+  %add1 = add i32 %0, %sum1
+  %add2 = add i32 %0, %sum1
+  %add3 = add i32 %0, %sum1
+  %add4 = add i32 %0, %sum1
+  %add5 = add i32 %0, %sum1
+  %add6 = add i32 %0, %sum1
+  %add7 = add i32 %0, %sum1
+  %add8 = add i32 %0, %sum1
+  %add9 = add i32 %0, %sum1
+  %add10 = add i32 %0, %sum1
+  %add11 = add i32 %0, %sum1
+  %add12 = add i32 %0, %sum1
+  %add13 = add i32 %0, %sum1
+  %add14 = add i32 %0, %sum1
+  %add15 = add i32 %0, %sum1
+  %add16 = add i32 %0, %sum1
+  %add17 = add i32 %0, %sum1
+  %add18 = add i32 %0, %sum1
+  %add19 = add i32 %0, %sum1
+  %add20 = add i32 %0, %sum1
+  %add21 = add i32 %0, %sum1
+  %add22 = add i32 %0, %sum1
+  %add23 = add i32 %0, %sum1
+  %add24 = add i32 %0, %sum1
+  %add25 = add i32 %0, %sum1
+  %add26 = add i32 %0, %sum1
+  %add27 = add i32 %0, %sum1
+  %add28 = add i32 %0, %sum1
+  %add29 = add i32 %0, %sum1
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  %addinc = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %addinc, %I
+  br i1 %exitcond25, label %for.loopexit, label %for.outer
+
+for.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: unprof4
+; No loop invariant loads
+define void @unprof4(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+; CHECK: %i = phi i32 [ %addinc, %for.latch ], [ 0, %for.outer.preheader ]
+; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %addinc, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum1 = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %j2 = add i32 %j, %i
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j2
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add i32 %0, %sum1
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4
+  %addinc = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %addinc, %I
+  br i1 %exitcond25, label %for.loopexit, label %for.outer
+
+for.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,735 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -basicaa -tbaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK-LABEL: test1
+; Tests for(i) { sum = 0; for(j) sum += B[j]; A[i] = sum; }
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[J:%.*]], 0
+; CHECK-NEXT:    [[CMPJ:%.*]] = icmp ne i32 [[I:%.*]], 0
+; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP]], [[CMPJ]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[FOR_OUTER_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.outer.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[I]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_OUTER_PREHEADER_NEW:%.*]]
+; CHECK:       for.outer.preheader.new:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
+; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
+; CHECK:       for.outer:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_LATCH]] ]
+; CHECK-NEXT:    [[ADD8:%.*]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[ADD8_1:%.*]] = add nuw nsw i32 [[ADD8]], 1
+; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[ADD8_2:%.*]] = add nuw nsw i32 [[ADD8_1]], 1
+; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[ADD8_3]] = add nuw i32 [[ADD8_2]], 1
+; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
+; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
+; CHECK:       for.inner:
+; CHECK-NEXT:    [[J_0:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_1:%.*]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_2:%.*]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_3:%.*]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[J_0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ADD]] = add i32 [[TMP2]], [[SUM]]
+; CHECK-NEXT:    [[INC]] = add nuw i32 [[J_0]], 1
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ADD_1]] = add i32 [[TMP3]], [[SUM_1]]
+; CHECK-NEXT:    [[INC_1]] = add nuw i32 [[J_1]], 1
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ADD_2]] = add i32 [[TMP4]], [[SUM_2]]
+; CHECK-NEXT:    [[INC_2]] = add nuw i32 [[J_2]], 1
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ADD_3]] = add i32 [[TMP5]], [[SUM_3]]
+; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[J_3]], 1
+; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[J]]
+; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]]
+; CHECK:       for.latch:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], [[FOR_INNER]] ]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA]], i32* [[ARRAYIDX6]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], i32* [[ARRAYIDX6_1]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_1]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], i32* [[ARRAYIDX6_2]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_2]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], i32* [[ARRAYIDX6_3]], align 4, !tbaa !0
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop !4
+; CHECK:       for.end.loopexit.unr-lcssa.loopexit:
+; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], [[FOR_LATCH]] ]
+; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; CHECK:       for.end.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK:       for.outer.epil.preheader:
+; CHECK-NEXT:    br label [[FOR_OUTER_EPIL:%.*]]
+; CHECK:       for.outer.epil:
+; CHECK-NEXT:    br label [[FOR_INNER_EPIL:%.*]]
+; CHECK:       for.inner.epil:
+; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], [[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_EPIL]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ADD_EPIL]] = add i32 [[TMP6]], [[SUM_EPIL]]
+; CHECK-NEXT:    [[INC_EPIL]] = add nuw i32 [[J_EPIL]], 1
+; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[INC_EPIL]], [[J]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]]
+; CHECK:       for.latch.epil:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_INNER_EPIL]] ]
+; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_UNR]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], i32* [[ARRAYIDX6_EPIL]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ADD8_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
+; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
+; CHECK:       for.end.loopexit.epilog-lcssa:
+; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+; CHECK:       for.outer.epil.1:
+; CHECK-NEXT:    br label [[FOR_INNER_EPIL_1:%.*]]
+; CHECK:       for.inner.epil.1:
+; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_EPIL_1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ADD_EPIL_1]] = add i32 [[TMP7]], [[SUM_EPIL_1]]
+; CHECK-NEXT:    [[INC_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1
+; CHECK-NEXT:    [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[INC_EPIL_1]], [[J]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]]
+; CHECK:       for.latch.epil.1:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_EPIL]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], i32* [[ARRAYIDX6_EPIL_1]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ADD8_EPIL_1:%.*]] = add nuw i32 [[ADD8_EPIL]], 1
+; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       for.outer.epil.2:
+; CHECK-NEXT:    br label [[FOR_INNER_EPIL_2:%.*]]
+; CHECK:       for.inner.epil.2:
+; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_EPIL_2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ADD_EPIL_2]] = add i32 [[TMP8]], [[SUM_EPIL_2]]
+; CHECK-NEXT:    [[INC_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1
+; CHECK-NEXT:    [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[INC_EPIL_2]], [[J]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]]
+; CHECK:       for.latch.epil.2:
+; CHECK-NEXT:    [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_EPIL_1]]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], i32* [[ARRAYIDX6_EPIL_2]], align 4, !tbaa !0
+; CHECK-NEXT:    [[ADD8_EPIL_2:%.*]] = add nuw i32 [[ADD8_EPIL_1]], 1
+; CHECK-NEXT:    [[EPIL_ITER_SUB_2:%.*]] = sub i32 [[EPIL_ITER_SUB_1]], 1
+; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmpJ = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmpJ
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !5
+  %add = add i32 %0, %sum
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4, !tbaa !5
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: test2
+; Tests for(i) { sum = A[i]; for(j) sum += B[j]; A[i] = sum; }
+; A[i] load/store dependency should not block unroll-and-jam
+; CHECK: for.outer:
+; CHECK:   %i = phi i32 [ %add9.3, %for.latch ], [ 0, %for.outer.preheader.new ]
+; CHECK:   %niter = phi i32 [ %unroll_iter, %for.outer.preheader.new ], [ %niter.nsub.3, %for.latch ]
+; CHECK:   br label %for.inner
+; CHECK: for.inner:
+; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+; CHECK:   %sum = phi i32 [ %2, %for.outer ], [ %add, %for.inner ]
+; CHECK:   %j.1 = phi i32 [ 0, %for.outer ], [ %inc.1, %for.inner ]
+; CHECK:   %sum.1 = phi i32 [ %3, %for.outer ], [ %add.1, %for.inner ]
+; CHECK:   %j.2 = phi i32 [ 0, %for.outer ], [ %inc.2, %for.inner ]
+; CHECK:   %sum.2 = phi i32 [ %4, %for.outer ], [ %add.2, %for.inner ]
+; CHECK:   %j.3 = phi i32 [ 0, %for.outer ], [ %inc.3, %for.inner ]
+; CHECK:   %sum.3 = phi i32 [ %5, %for.outer ], [ %add.3, %for.inner ]
+; CHECK:   br i1 %exitcond.3, label %for.latch, label %for.inner
+; CHECK: for.latch:
+; CHECK:   %add.lcssa = phi i32 [ %add, %for.inner ]
+; CHECK:   %add.lcssa.1 = phi i32 [ %add.1, %for.inner ]
+; CHECK:   %add.lcssa.2 = phi i32 [ %add.2, %for.inner ]
+; CHECK:   %add.lcssa.3 = phi i32 [ %add.3, %for.inner ]
+; CHECK:   br i1 %niter.ncmp.3, label %for.end10.loopexit.unr-lcssa.loopexit, label %for.outer
+; CHECK: for.end10.loopexit.unr-lcssa.loopexit:
+define void @test2(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp125 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp125
+  br i1 %or.cond, label %for.outer.preheader, label %for.end10
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add9, %for.latch ], [ 0, %for.outer.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !5
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum = phi i32 [ %0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %B, i32 %j
+  %1 = load i32, i32* %arrayidx6, align 4, !tbaa !5
+  %add = add i32 %1, %sum
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  store i32 %add.lcssa, i32* %arrayidx, align 4, !tbaa !5
+  %add9 = add nuw i32 %i, 1
+  %exitcond28 = icmp eq i32 %add9, %I
+  br i1 %exitcond28, label %for.end10.loopexit, label %for.outer
+
+for.end10.loopexit:
+  br label %for.end10
+
+for.end10:
+  ret void
+}
+
+
+; CHECK-LABEL: test3
+; Tests Complete unroll-and-jam of the outer loop
+; CHECK: for.outer:
+; CHECK:   br label %for.inner
+; CHECK: for.inner:
+; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+; CHECK:   %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+; CHECK:   %j.1 = phi i32 [ 0, %for.outer ], [ %inc.1, %for.inner ]
+; CHECK:   %sum.1 = phi i32 [ 0, %for.outer ], [ %add.1, %for.inner ]
+; CHECK:   %j.2 = phi i32 [ 0, %for.outer ], [ %inc.2, %for.inner ]
+; CHECK:   %sum.2 = phi i32 [ 0, %for.outer ], [ %add.2, %for.inner ]
+; CHECK:   %j.3 = phi i32 [ 0, %for.outer ], [ %inc.3, %for.inner ]
+; CHECK:   %sum.3 = phi i32 [ 0, %for.outer ], [ %add.3, %for.inner ]
+; CHECK:   br i1 %exitcond.3, label %for.latch, label %for.inner
+; CHECK: for.latch:
+; CHECK:   %add.lcssa = phi i32 [ %add, %for.inner ]
+; CHECK:   %add.lcssa.1 = phi i32 [ %add.1, %for.inner ]
+; CHECK:   %add.lcssa.2 = phi i32 [ %add.2, %for.inner ]
+; CHECK:   %add.lcssa.3 = phi i32 [ %add.3, %for.inner ]
+; CHECK:   br label %for.end
+; CHECK: for.end:
+define void @test3(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+entry:
+  %cmp = icmp eq i32 %J, 0
+  br i1 %cmp, label %for.end, label %for.preheader
+
+for.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !5
+  %sub = add i32 %sum, 10
+  %add = sub i32 %sub, %0
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add, i32* %arrayidx6, align 4, !tbaa !5
+  %add8 = add nuw nsw i32 %i, 1
+  %exitcond23 = icmp eq i32 %add8, 4
+  br i1 %exitcond23, label %for.end, label %for.outer
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: test4
+; Tests Complete unroll-and-jam with a trip count of 1
+; CHECK: for.outer:
+; CHECK:   br label %for.inner
+; CHECK: for.inner:
+; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+; CHECK:   %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+; CHECK:   br i1 %exitcond, label %for.latch, label %for.inner
+; CHECK: for.latch:
+; CHECK:   %add.lcssa = phi i32 [ %add, %for.inner ]
+; CHECK:   br label %for.end
+; CHECK: for.end:
+define void @test4(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+entry:
+  %cmp = icmp eq i32 %J, 0
+  br i1 %cmp, label %for.end, label %for.preheader
+
+for.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !5
+  %sub = add i32 %sum, 10
+  %add = sub i32 %sub, %0
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add, i32* %arrayidx6, align 4, !tbaa !5
+  %add8 = add nuw nsw i32 %i, 1
+  %exitcond23 = icmp eq i32 %add8, 1
+  br i1 %exitcond23, label %for.end, label %for.outer
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: test5
+; Multiple SubLoopBlocks
+; CHECK: for.outer:
+; CHECK:   br label %for.inner
+; CHECK: for.inner:
+; CHECK:   %inc8.sink15 = phi i32 [ 0, %for.outer ], [ %inc8, %for.inc.1 ]
+; CHECK:   %inc8.sink15.1 = phi i32 [ 0, %for.outer ], [ %inc8.1, %for.inc.1 ]
+; CHECK:   br label %for.inner2
+; CHECK: for.inner2:
+; CHECK:   br i1 %tobool, label %for.cond4, label %for.inc
+; CHECK: for.cond4:
+; CHECK:   br i1 %tobool.1, label %for.cond4a, label %for.inc
+; CHECK: for.cond4a:
+; CHECK:   br label %for.inc
+; CHECK: for.inc:
+; CHECK:   br i1 %tobool.11, label %for.cond4.1, label %for.inc.1
+; CHECK: for.latch:
+; CHECK:   br label %for.end
+; CHECK: for.end:
+; CHECK:   ret i32 0
+; CHECK: for.cond4.1:
+; CHECK:   br i1 %tobool.1.1, label %for.cond4a.1, label %for.inc.1
+; CHECK: for.cond4a.1:
+; CHECK:   br label %for.inc.1
+; CHECK: for.inc.1:
+; CHECK:   br i1 %exitcond.1, label %for.latch, label %for.inner
+ at a = hidden global [1 x i32] zeroinitializer, align 4
+define i32 @test5() #0 {
+entry:
+  br label %for.outer
+
+for.outer:
+  %.sink16 = phi i32 [ 0, %entry ], [ %add, %for.latch ]
+  br label %for.inner
+
+for.inner:
+  %inc8.sink15 = phi i32 [ 0, %for.outer ], [ %inc8, %for.inc ]
+  br label %for.inner2
+
+for.inner2:
+  %l1 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 0, i32 0), align 4
+  %tobool = icmp eq i32 %l1, 0
+  br i1 %tobool, label %for.cond4, label %for.inc
+
+for.cond4:
+  %l0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 1, i32 0), align 4
+  %tobool.1 = icmp eq i32 %l0, 0
+  br i1 %tobool.1, label %for.cond4a, label %for.inc
+
+for.cond4a:
+  br label %for.inc
+
+for.inc:
+  %l2 = phi i32 [ 0, %for.inner2 ], [ 1, %for.cond4 ], [ 2, %for.cond4a ]
+  %inc8 = add nuw nsw i32 %inc8.sink15, 1
+  %exitcond = icmp eq i32 %inc8, 3
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %.lcssa = phi i32 [ %l2, %for.inc ]
+  %conv11 = and i32 %.sink16, 255
+  %add = add nuw nsw i32 %conv11, 4
+  %cmp = icmp eq i32 %add, 8
+  br i1 %cmp, label %for.end, label %for.outer
+
+for.end:
+  %.lcssa.lcssa = phi i32 [ %.lcssa, %for.latch ]
+  ret i32 0
+}
+
+
+; CHECK-LABEL: test6
+; Test odd uses of phi nodes
+; CHECK: for.outer:
+; CHECK:   br label %for.inner
+; CHECK: for.inner:
+; CHECK:   br i1 %exitcond.3, label %for.inner, label %for.latch
+; CHECK: for.latch:
+; CHECK:   br label %for.end
+; CHECK: for.end:
+; CHECK:   ret i32 0
+ at f = hidden global i32 0, align 4
+define i32 @test6() #0 {
+entry:
+  %f.promoted10 = load i32, i32* @f, align 4, !tbaa !5
+  br label %for.outer
+
+for.outer:
+  %p0 = phi i32 [ %f.promoted10, %entry ], [ 2, %for.latch ]
+  %inc5.sink9 = phi i32 [ 2, %entry ], [ %inc5, %for.latch ]
+  br label %for.inner
+
+for.inner:
+  %p1 = phi i32 [ %p0, %for.outer ], [ 2, %for.inner ]
+  %inc.sink8 = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %inc = add nuw nsw i32 %inc.sink8, 1
+  %exitcond = icmp ne i32 %inc, 7
+  br i1 %exitcond, label %for.inner, label %for.latch
+
+for.latch:
+  %.lcssa = phi i32 [ %p1, %for.inner ]
+  %inc5 = add nuw nsw i32 %inc5.sink9, 1
+  %exitcond11 = icmp ne i32 %inc5, 7
+  br i1 %exitcond11, label %for.outer, label %for.end
+
+for.end:
+  %.lcssa.lcssa = phi i32 [ %.lcssa, %for.latch ]
+  %inc.lcssa.lcssa = phi i32 [ 7, %for.latch ]
+  ret i32 0
+}
+
+
+; CHECK-LABEL: test7
+; Has a positive dependency between two stores. Still valid.
+; The negative dependecy is in unroll-and-jam-disabled.ll
+; CHECK: for.outer:
+; CHECK:   %i = phi i32 [ %add.3, %for.latch ], [ 0, %for.preheader.new ]
+; CHECK:   %niter = phi i32 [ %unroll_iter, %for.preheader.new ], [ %niter.nsub.3, %for.latch ]
+; CHECK:   br label %for.inner
+; CHECK: for.latch:
+; CHECK:   %add9.lcssa = phi i32 [ %add9, %for.inner ]
+; CHECK:   %add9.lcssa.1 = phi i32 [ %add9.1, %for.inner ]
+; CHECK:   %add9.lcssa.2 = phi i32 [ %add9.2, %for.inner ]
+; CHECK:   %add9.lcssa.3 = phi i32 [ %add9.3, %for.inner ]
+; CHECK:   br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit, label %for.outer
+; CHECK: for.inner:
+; CHECK:   %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ]
+; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ]
+; CHECK:   %sum.1 = phi i32 [ 0, %for.outer ], [ %add9.1, %for.inner ]
+; CHECK:   %j.1 = phi i32 [ 0, %for.outer ], [ %add10.1, %for.inner ]
+; CHECK:   %sum.2 = phi i32 [ 0, %for.outer ], [ %add9.2, %for.inner ]
+; CHECK:   %j.2 = phi i32 [ 0, %for.outer ], [ %add10.2, %for.inner ]
+; CHECK:   %sum.3 = phi i32 [ 0, %for.outer ], [ %add9.3, %for.inner ]
+; CHECK:   %j.3 = phi i32 [ 0, %for.outer ], [ %add10.3, %for.inner ]
+; CHECK:   br i1 %exitcond.3, label %for.latch, label %for.inner
+; CHECK: for.end.loopexit.unr-lcssa.loopexit:
+define void @test7(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp128 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp128, %cmp
+  br i1 %or.cond, label %for.preheader, label %for.end
+
+for.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add, %for.latch ], [ 0, %for.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 0, i32* %arrayidx, align 4, !tbaa !5
+  %add = add nuw i32 %i, 1
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add
+  store i32 2, i32* %arrayidx2, align 4, !tbaa !5
+  br label %for.inner
+
+for.latch:
+  store i32 %add9, i32* %arrayidx, align 4, !tbaa !5
+  %exitcond30 = icmp eq i32 %add, %I
+  br i1 %exitcond30, label %for.end, label %for.outer
+
+for.inner:
+  %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ]
+  %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ]
+  %arrayidx7 = getelementptr inbounds i32, i32* %B, i32 %j
+  %l1 = load i32, i32* %arrayidx7, align 4, !tbaa !5
+  %add9 = add i32 %l1, %sum
+  %add10 = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %add10, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: test8
+; Same as test7 with an extra outer loop nest
+; CHECK: for.outest:
+; CHECK:   br label %for.outer
+; CHECK: for.outer:
+; CHECK:   %i = phi i32 [ %add.3, %for.latch ], [ 0, %for.outest.new ]
+; CHECK:   %niter = phi i32 [ %unroll_iter, %for.outest.new ], [ %niter.nsub.3, %for.latch ]
+; CHECK:   br label %for.inner
+; CHECK: for.inner:
+; CHECK:   %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ]
+; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ]
+; CHECK:   %sum.1 = phi i32 [ 0, %for.outer ], [ %add9.1, %for.inner ]
+; CHECK:   %j.1 = phi i32 [ 0, %for.outer ], [ %add10.1, %for.inner ]
+; CHECK:   %sum.2 = phi i32 [ 0, %for.outer ], [ %add9.2, %for.inner ]
+; CHECK:   %j.2 = phi i32 [ 0, %for.outer ], [ %add10.2, %for.inner ]
+; CHECK:   %sum.3 = phi i32 [ 0, %for.outer ], [ %add9.3, %for.inner ]
+; CHECK:   %j.3 = phi i32 [ 0, %for.outer ], [ %add10.3, %for.inner ]
+; CHECK:   br i1 %exitcond.3, label %for.latch, label %for.inner
+; CHECK: for.latch:
+; CHECK:   %add9.lcssa = phi i32 [ %add9, %for.inner ]
+; CHECK:   %add9.lcssa.1 = phi i32 [ %add9.1, %for.inner ]
+; CHECK:   %add9.lcssa.2 = phi i32 [ %add9.2, %for.inner ]
+; CHECK:   %add9.lcssa.3 = phi i32 [ %add9.3, %for.inner ]
+; CHECK:   br i1 %niter.ncmp.3, label %for.cleanup.unr-lcssa.loopexit, label %for.outer
+; CHECK: for.cleanup.epilog-lcssa:
+; CHECK:   br label %for.cleanup
+; CHECK: for.cleanup:
+; CHECK:   br i1 %exitcond41, label %for.end.loopexit, label %for.outest
+; CHECK: for.end.loopexit:
+; CHECK:   br label %for.end
+define void @test8(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 {
+entry:
+  %cmp = icmp eq i32 %J, 0
+  %cmp336 = icmp eq i32 %I, 0
+  %or.cond = or i1 %cmp, %cmp336
+  br i1 %or.cond, label %for.end, label %for.preheader
+
+for.preheader:
+  br label %for.outest
+
+for.outest:
+  %x.038 = phi i32 [ %inc, %for.cleanup ], [ 0, %for.preheader ]
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add, %for.latch ], [ 0, %for.outest ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 0, i32* %arrayidx, align 4, !tbaa !5
+  %add = add nuw i32 %i, 1
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %add
+  store i32 2, i32* %arrayidx6, align 4, !tbaa !5
+  br label %for.inner
+
+for.inner:
+  %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ]
+  %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ]
+  %arrayidx11 = getelementptr inbounds i32, i32* %B, i32 %j
+  %l1 = load i32, i32* %arrayidx11, align 4, !tbaa !5
+  %add9 = add i32 %l1, %sum
+  %add10 = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %add10, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  store i32 %add9, i32* %arrayidx, align 4, !tbaa !5
+  %exitcond39 = icmp eq i32 %add, %I
+  br i1 %exitcond39, label %for.cleanup, label %for.outer
+
+for.cleanup:
+  %inc = add nuw nsw i32 %x.038, 1
+  %exitcond41 = icmp eq i32 %inc, 5
+  br i1 %exitcond41, label %for.end, label %for.outest
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: test9
+; Same as test1 with tbaa, not noalias
+; CHECK: for.outer:
+; CHECK:   %i = phi i32 [ %add8.3, %for.latch ], [ 0, %for.outer.preheader.new ]
+; CHECK:   %niter = phi i32 [ %unroll_iter, %for.outer.preheader.new ], [ %niter.nsub.3, %for.latch ]
+; CHECK:   br label %for.inner
+; CHECK: for.inner:
+; CHECK:   %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+; CHECK:   %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+; CHECK:   %j.1 = phi i32 [ 0, %for.outer ], [ %inc.1, %for.inner ]
+; CHECK:   %sum.1 = phi i32 [ 0, %for.outer ], [ %add.1, %for.inner ]
+; CHECK:   %j.2 = phi i32 [ 0, %for.outer ], [ %inc.2, %for.inner ]
+; CHECK:   %sum.2 = phi i32 [ 0, %for.outer ], [ %add.2, %for.inner ]
+; CHECK:   %j.3 = phi i32 [ 0, %for.outer ], [ %inc.3, %for.inner ]
+; CHECK:   %sum.3 = phi i32 [ 0, %for.outer ], [ %add.3, %for.inner ]
+; CHECK:   br i1 %exitcond.3, label %for.latch, label %for.inner
+; CHECK: for.latch:
+; CHECK:   %add.lcssa = phi i32 [ %add, %for.inner ]
+; CHECK:   %add.lcssa.1 = phi i32 [ %add.1, %for.inner ]
+; CHECK:   %add.lcssa.2 = phi i32 [ %add.2, %for.inner ]
+; CHECK:   %add.lcssa.3 = phi i32 [ %add.3, %for.inner ]
+; CHECK:   br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit, label %for.outer
+; CHECK: for.end.loopexit.unr-lcssa.loopexit:
+define void @test9(i32 %I, i32 %J, i32* nocapture %A, i16* nocapture readonly %B) #0 {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmpJ = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmpJ
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ]
+  %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ]
+  %arrayidx = getelementptr inbounds i16, i16* %B, i32 %j
+  %0 = load i16, i16* %arrayidx, align 4, !tbaa !9
+  %sext = sext i16 %0 to i32
+  %add = add i32 %sext, %sum
+  %inc = add nuw i32 %j, 1
+  %exitcond = icmp eq i32 %inc, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.lcssa = phi i32 [ %add, %for.inner ]
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i
+  store i32 %add.lcssa, i32* %arrayidx6, align 4, !tbaa !5
+  %add8 = add nuw i32 %i, 1
+  %exitcond25 = icmp eq i32 %add8, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; CHECK-LABEL: test10
+; Be careful not to incorrectly update the exit phi nodes
+; CHECK: %dec.lcssa.lcssa.ph.ph = phi i64 [ 0, %for.inc24 ]
+%struct.a = type { i64 }
+ at g = common global %struct.a zeroinitializer, align 8
+ at c = common global [1 x i8] zeroinitializer, align 1
+define signext i16 @test10(i32 %k) #0 {
+entry:
+  %0 = load i8, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @c, i64 0, i64 0), align 1
+  %tobool9 = icmp eq i8 %0, 0
+  %tobool13 = icmp ne i32 %k, 0
+  br label %for.body
+
+for.body:
+  %storemerge82 = phi i64 [ 0, %entry ], [ %inc25, %for.inc24 ]
+  br label %for.body2
+
+for.body2:
+  %storemerge = phi i64 [ 4, %for.body ], [ %dec, %for.inc21 ]
+  br i1 %tobool9, label %for.body2.split, label %for.body2.split2
+
+for.body2.split2:
+  br i1 %tobool13, label %for.inc21, label %for.inc21.if
+
+for.body2.split:
+  br i1 %tobool13, label %for.inc21, label %for.inc21.then
+
+for.inc21.if:
+  %storemerge.1 = phi i64 [ 0, %for.body2.split2 ]
+  br label %for.inc21
+
+for.inc21.then:
+  %storemerge.2 = phi i64 [ 0, %for.body2.split ]
+  %storemerge.3 = phi i32 [ 0, %for.body2.split ]
+  br label %for.inc21
+
+for.inc21:
+  %storemerge.4 = phi i64 [ %storemerge.1, %for.inc21.if ], [ %storemerge.2, %for.inc21.then ], [ 4, %for.body2.split2 ], [ 4, %for.body2.split ]
+  %storemerge.5 = phi i32 [ 0, %for.inc21.if ], [ %storemerge.3, %for.inc21.then ], [ 0, %for.body2.split2 ], [ 0, %for.body2.split ]
+  %dec = add nsw i64 %storemerge, -1
+  %tobool = icmp eq i64 %dec, 0
+  br i1 %tobool, label %for.inc24, label %for.body2
+
+for.inc24:
+  %storemerge.4.lcssa = phi i64 [ %storemerge.4, %for.inc21 ]
+  %storemerge.5.lcssa = phi i32 [ %storemerge.5, %for.inc21 ]
+  %inc25 = add nuw nsw i64 %storemerge82, 1
+  %exitcond = icmp ne i64 %inc25, 5
+  br i1 %exitcond, label %for.body, label %for.end26
+
+for.end26:
+  %dec.lcssa.lcssa = phi i64 [ 0, %for.inc24 ]
+  %storemerge.4.lcssa.lcssa = phi i64 [ %storemerge.4.lcssa, %for.inc24 ]
+  %storemerge.5.lcssa.lcssa = phi i32 [ %storemerge.5.lcssa, %for.inc24 ]
+  store i64 %dec.lcssa.lcssa, i64* getelementptr inbounds (%struct.a, %struct.a* @g, i64 0, i32 0), align 8
+  ret i16 0
+}
+
+
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"short", !7, i64 0}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+	%struct.BLEND_MAP = type { i16, i16, i16, i32, %struct.BLEND_MAP_ENTRY* }
+	%struct.BLEND_MAP_ENTRY = type { float, i8, { [5 x float], [4 x i8] } }
+	%struct.TPATTERN = type { i16, i16, i16, i32, float, float, float, %struct.WARP*, %struct.TPATTERN*, %struct.BLEND_MAP*, { %struct.anon, [4 x i8] } }
+	%struct.TURB = type { i16, %struct.WARP*, [3 x double], i32, float, float }
+	%struct.WARP = type { i16, %struct.WARP* }
+	%struct.anon = type { float, [3 x double] }
+
+define void @Parse_Pattern() {
+entry:
+	br label %bb1096.outer20
+bb671:		; preds = %cond_true1099
+	br label %bb1096.outer23
+bb1096.outer20.loopexit:		; preds = %cond_true1099
+	%Local_Turb.0.ph24.lcssa = phi %struct.TURB* [ %Local_Turb.0.ph24, %cond_true1099 ]		; <%struct.TURB*> [#uses=1]
+	br label %bb1096.outer20
+bb1096.outer20:		; preds = %bb1096.outer20.loopexit, %entry
+	%Local_Turb.0.ph22 = phi %struct.TURB* [ undef, %entry ], [ %Local_Turb.0.ph24.lcssa, %bb1096.outer20.loopexit ]		; <%struct.TURB*> [#uses=1]
+	%tmp1098 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br label %bb1096.outer23
+bb1096.outer23:		; preds = %bb1096.outer20, %bb671
+	%Local_Turb.0.ph24 = phi %struct.TURB* [ %Local_Turb.0.ph22, %bb1096.outer20 ], [ null, %bb671 ]		; <%struct.TURB*> [#uses=2]
+	br label %bb1096
+bb1096:		; preds = %cond_true1099, %bb1096.outer23
+	br i1 %tmp1098, label %cond_true1099, label %bb1102
+cond_true1099:		; preds = %bb1096
+	switch i32 0, label %bb1096.outer20.loopexit [
+		 i32 161, label %bb671
+		 i32 359, label %bb1096
+	]
+bb1102:		; preds = %bb1096
+	%Local_Turb.0.ph24.lcssa1 = phi %struct.TURB* [ %Local_Turb.0.ph24, %bb1096 ]		; <%struct.TURB*> [#uses=0]
+	ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+define void @init_caller_save() {
+entry:
+	br label %cond_true78
+cond_next20:		; preds = %cond_true64
+	br label %bb31
+bb31:		; preds = %cond_true64, %cond_true64, %cond_next20
+	%iftmp.29.1 = phi i32 [ 0, %cond_next20 ], [ 0, %cond_true64 ], [ 0, %cond_true64 ]		; <i32> [#uses=0]
+	br label %bb54
+bb54:		; preds = %cond_true78, %bb31
+	br i1 false, label %bb75, label %cond_true64
+cond_true64:		; preds = %bb54
+	switch i32 %i.0.0, label %cond_next20 [
+		 i32 17, label %bb31
+		 i32 18, label %bb31
+	]
+bb75:		; preds = %bb54
+	%tmp74.0 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	br label %cond_true78
+cond_true78:		; preds = %bb75, %entry
+	%i.0.0 = phi i32 [ 0, %entry ], [ %tmp74.0, %bb75 ]		; <i32> [#uses=2]
+	br label %bb54
+}
+

Added: llvm/trunk/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; PR1333
+; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+	%struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
+	%struct.ada__tags__dispatch_table = type { [1 x i8*] }
+	%struct.quotes__T173s = type { i8, %struct.quotes__T173s__T174s, [2 x [1 x double]], [2 x i16], i64, i8 }
+	%struct.quotes__T173s__T174s = type { i8, i8, i8, i16, i16, [2 x [1 x double]] }
+
+define void @quotes__write_quote() {
+entry:
+	%tmp606.i = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br label %bb
+bb:		; preds = %cond_next73, %bb, %entry
+	br i1 false, label %bb51, label %bb
+bb51:		; preds = %cond_next73, %bb
+	br i1 %tmp606.i, label %quotes__bid_ask_depth_offset_matrices__get_price.exit, label %cond_true.i
+cond_true.i:		; preds = %bb51
+	unreachable
+quotes__bid_ask_depth_offset_matrices__get_price.exit:		; preds = %bb51
+	br i1 false, label %cond_next73, label %cond_true72
+cond_true72:		; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit
+	unreachable
+cond_next73:		; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit
+	br i1 false, label %bb, label %bb51
+}
+

Added: llvm/trunk/test/Transforms/LoopUnswitch/2007-05-09-tl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2007-05-09-tl.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2007-05-09-tl.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2007-05-09-tl.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,96 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR1333
+
+define void @pp_cxx_expression() {
+entry:
+	%tmp6 = lshr i32 0, 24		; <i32> [#uses=1]
+	br label %tailrecurse
+
+tailrecurse:		; preds = %tailrecurse, %tailrecurse, %entry
+	switch i32 %tmp6, label %bb96 [
+		 i32 24, label %bb10
+		 i32 25, label %bb10
+		 i32 28, label %bb10
+		 i32 29, label %bb48
+		 i32 31, label %bb48
+		 i32 32, label %bb48
+		 i32 33, label %bb48
+		 i32 34, label %bb48
+		 i32 36, label %bb15
+		 i32 51, label %bb89
+		 i32 52, label %bb89
+		 i32 54, label %bb83
+		 i32 57, label %bb59
+		 i32 63, label %bb80
+		 i32 64, label %bb80
+		 i32 68, label %bb80
+		 i32 169, label %bb75
+		 i32 170, label %bb19
+		 i32 171, label %bb63
+		 i32 172, label %bb63
+		 i32 173, label %bb67
+		 i32 174, label %bb67
+		 i32 175, label %bb19
+		 i32 176, label %bb75
+		 i32 178, label %bb59
+		 i32 179, label %bb89
+		 i32 180, label %bb59
+		 i32 182, label %bb48
+		 i32 183, label %bb48
+		 i32 184, label %bb48
+		 i32 185, label %bb48
+		 i32 186, label %bb48
+		 i32 195, label %bb48
+		 i32 196, label %bb59
+		 i32 197, label %bb89
+		 i32 198, label %bb70
+		 i32 199, label %bb59
+		 i32 200, label %bb59
+		 i32 201, label %bb59
+		 i32 202, label %bb59
+		 i32 203, label %bb75
+		 i32 204, label %bb59
+		 i32 205, label %tailrecurse
+		 i32 210, label %tailrecurse
+	]
+
+bb10:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb15:		; preds = %tailrecurse
+	ret void
+
+bb19:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb48:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb59:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb63:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb67:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb70:		; preds = %tailrecurse
+	ret void
+
+bb75:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb80:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb83:		; preds = %tailrecurse
+	ret void
+
+bb89:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb96:		; preds = %tailrecurse
+	ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,46 @@
+; RUN: opt < %s -loop-unswitch -instcombine -disable-output
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -instcombine -disable-output
+
+ at str3 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
+
+define i32 @stringSearch_Clib(i32 %count) {
+entry:
+	%ttmp25 = icmp sgt i32 %count, 0		; <i1> [#uses=1]
+	br i1 %ttmp25, label %bb36.preheader, label %bb44
+
+bb36.preheader:		; preds = %entry
+	%ttmp33 = icmp slt i32 0, 250		; <i1> [#uses=1]
+	br label %bb36.outer
+
+bb36.outer:		; preds = %bb41, %bb36.preheader
+	br i1 %ttmp33, label %bb.nph, label %bb41
+
+bb.nph:		; preds = %bb36.outer
+	%ttmp8 = icmp eq i8* null, null		; <i1> [#uses=1]
+	%ttmp6 = icmp eq i8* null, null		; <i1> [#uses=1]
+	%tmp31 = call i32 @strcspn( i8* null, i8* getelementptr ([3 x i8], [3 x i8]* @str3, i64 0, i64 0) )		; <i32> [#uses=1]
+	br i1 %ttmp8, label %cond_next, label %cond_true
+
+cond_true:		; preds = %bb.nph
+	ret i32 0
+
+cond_next:		; preds = %bb.nph
+	br i1 %ttmp6, label %cond_next28, label %cond_true20
+
+cond_true20:		; preds = %cond_next
+	ret i32 0
+
+cond_next28:		; preds = %cond_next
+	%tmp33 = add i32 %tmp31, 0		; <i32> [#uses=1]
+	br label %bb41
+
+bb41:		; preds = %cond_next28, %bb36.outer
+	%c.2.lcssa = phi i32 [ 0, %bb36.outer ], [ %tmp33, %cond_next28 ]		; <i32> [#uses=1]
+	br i1 false, label %bb36.outer, label %bb44
+
+bb44:		; preds = %bb41, %entry
+	%c.01.1 = phi i32 [ 0, %entry ], [ %c.2.lcssa, %bb41 ]		; <i32> [#uses=1]
+	ret i32 %c.01.1
+}
+
+declare i32 @strcspn(i8*, i8*)

Added: llvm/trunk/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%tmp1785365 = icmp ult i32 0, 100		; <i1> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %entry
+	br i1 false, label %cond_true, label %cond_next
+
+cond_true:		; preds = %bb
+	br i1 %tmp1785365, label %bb, label %bb1788
+
+cond_next:		; preds = %bb
+	%iftmp.1.0 = select i1 false, i32 0, i32 0		; <i32> [#uses=1]
+	br i1 false, label %cond_true47, label %cond_next74
+
+cond_true47:		; preds = %cond_next
+	%tmp53 = urem i32 %iftmp.1.0, 0		; <i32> [#uses=0]
+	ret i32 0
+
+cond_next74:		; preds = %cond_next
+	ret i32 0
+
+bb1788:		; preds = %cond_true
+	ret i32 0
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR1559
+
+target triple = "i686-pc-linux-gnu"
+	%struct.re_pattern_buffer = type { i8*, i32, i32, i32, i8*, i8*, i32, i8 }
+
+define fastcc i32 @byte_regex_compile(i8* %pattern, i32 %size, i32 %syntax, %struct.re_pattern_buffer* %bufp) {
+entry:
+        br i1 false, label %bb147, label %cond_next123
+
+cond_next123:           ; preds = %entry
+        ret i32 0
+
+bb147:          ; preds = %entry
+        switch i32 0, label %normal_char [
+                 i32 91, label %bb1734
+                 i32 92, label %bb5700
+        ]
+
+bb1734:         ; preds = %bb147
+        br label %bb1855.outer.outer
+
+cond_true1831:          ; preds = %bb1855.outer
+        br i1 %tmp1837, label %cond_next1844, label %cond_true1840
+
+cond_true1840:          ; preds = %cond_true1831
+        ret i32 0
+
+cond_next1844:          ; preds = %cond_true1831
+        br i1 false, label %bb1855.outer, label %cond_true1849
+
+cond_true1849:          ; preds = %cond_next1844
+        br label %bb1855.outer.outer
+
+bb1855.outer.outer:             ; preds = %cond_true1849, %bb1734
+        %b.10.ph.ph = phi i8* [ null, %cond_true1849 ], [ null, %bb1734 ]               ; <i8*> [#uses=1]
+        br label %bb1855.outer
+
+bb1855.outer:           ; preds = %bb1855.outer.outer, %cond_next1844
+        %b.10.ph = phi i8* [ null, %cond_next1844 ], [ %b.10.ph.ph, %bb1855.outer.outer ]               ; <i8*> [#uses=1]
+        %tmp1837 = icmp eq i8* null, null               ; <i1> [#uses=2]
+        br i1 false, label %cond_true1831, label %cond_next1915
+
+cond_next1915:          ; preds = %cond_next1961, %bb1855.outer
+        store i8* null, i8** null
+        br i1 %tmp1837, label %cond_next1929, label %cond_true1923
+
+cond_true1923:          ; preds = %cond_next1915
+        ret i32 0
+
+cond_next1929:          ; preds = %cond_next1915
+        br i1 false, label %cond_next1961, label %cond_next2009
+
+cond_next1961:          ; preds = %cond_next1929
+        %tmp1992 = getelementptr i8, i8* %b.10.ph, i32 0            ; <i8*> [#uses=0]
+        br label %cond_next1915
+
+cond_next2009:          ; preds = %cond_next1929
+        ret i32 0
+
+bb5700:         ; preds = %bb147
+        ret i32 0
+
+normal_char:            ; preds = %bb147
+        ret i32 0
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt < %s -licm -loop-unswitch -disable-output 
+; PR 1589
+
+      	%struct.QBasicAtomic = type { i32 }
+
+define void @_ZNK5QDate9addMonthsEi(%struct.QBasicAtomic* sret  %agg.result, %struct.QBasicAtomic* %this, i32 %nmonths) {
+entry:
+	br label %cond_true90
+
+bb16:		; preds = %cond_true90
+	br i1 false, label %bb93, label %cond_true90
+
+bb45:		; preds = %cond_true90
+	br i1 false, label %bb53, label %bb58
+
+bb53:		; preds = %bb45
+	br i1 false, label %bb93, label %cond_true90
+
+bb58:		; preds = %bb45
+	store i32 0, i32* null, align 4
+	br i1 false, label %cond_true90, label %bb93
+
+cond_true90:		; preds = %bb58, %bb53, %bb16, %entry
+	%nmonths_addr.016.1 = phi i32 [ %nmonths, %entry ], [ 0, %bb16 ], [ 0, %bb53 ], [ %nmonths_addr.016.1, %bb58 ]		; <i32> [#uses=2]
+	%tmp14 = icmp slt i32 %nmonths_addr.016.1, -11		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb16, label %bb45
+
+bb93:		; preds = %bb58, %bb53, %bb16
+	ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt < %s -loop-unswitch -instcombine -disable-output
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -instcombine -disable-output
+	%struct.ClassDef = type { %struct.QByteArray, %struct.QByteArray, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", i8, i8, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", i32, i32 }
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct.Generator = type { %struct.FILE*, %struct.ClassDef*, %"struct.QList<ArgumentDef>", %struct.QByteArray, %"struct.QList<ArgumentDef>" }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QByteArray = type { %"struct.QByteArray::Data"* }
+	%"struct.QByteArray::Data" = type { %struct.QBasicAtomic, i32, i32, i8*, [1 x i8] }
+	%"struct.QList<ArgumentDef>" = type { %"struct.QList<ArgumentDef>::._19" }
+	%"struct.QList<ArgumentDef>::._19" = type { %struct.QListData }
+	%struct.QListData = type { %"struct.QListData::Data"* }
+	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
+	%"struct.QMap<QByteArray,QByteArray>" = type { %"struct.QMap<QByteArray,QByteArray>::._56" }
+	%"struct.QMap<QByteArray,QByteArray>::._56" = type { %struct.QMapData* }
+	%struct.QMapData = type { %struct.QMapData*, [12 x %struct.QMapData*], %struct.QBasicAtomic, i32, i32, i32, i8 }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+ at .str9 = external constant [1 x i8]		; <[1 x i8]*> [#uses=1]
+
+declare i32 @strcmp(i8*, i8*)
+
+define i32 @_ZN9Generator6strregEPKc(%struct.Generator* %this, i8* %s) {
+entry:
+	%s_addr.0 = select i1 false, i8* getelementptr ([1 x i8], [1 x i8]* @.str9, i32 0, i32 0), i8* %s		; <i8*> [#uses=2]
+	%tmp122 = icmp eq i8* %s_addr.0, null		; <i1> [#uses=1]
+	br label %bb184
+
+bb55:		; preds = %bb184
+	ret i32 0
+
+bb88:		; preds = %bb184
+	br i1 %tmp122, label %bb154, label %bb128
+
+bb128:		; preds = %bb88
+	%tmp138 = call i32 @strcmp( i8* null, i8* %s_addr.0 )		; <i32> [#uses=1]
+	%iftmp.37.0.in4 = icmp eq i32 %tmp138, 0		; <i1> [#uses=1]
+	br i1 %iftmp.37.0.in4, label %bb250, label %bb166
+
+bb154:		; preds = %bb88
+	br i1 false, label %bb250, label %bb166
+
+bb166:		; preds = %bb154, %bb128
+	%tmp175 = add i32 %idx.0, 1		; <i32> [#uses=1]
+	%tmp177 = add i32 %tmp175, 0		; <i32> [#uses=1]
+	%tmp181 = add i32 %tmp177, 0		; <i32> [#uses=1]
+	%tmp183 = add i32 %i33.0, 1		; <i32> [#uses=1]
+	br label %bb184
+
+bb184:		; preds = %bb166, %entry
+	%i33.0 = phi i32 [ 0, %entry ], [ %tmp183, %bb166 ]		; <i32> [#uses=2]
+	%idx.0 = phi i32 [ 0, %entry ], [ %tmp181, %bb166 ]		; <i32> [#uses=2]
+	%tmp49 = icmp slt i32 %i33.0, 0		; <i1> [#uses=1]
+	br i1 %tmp49, label %bb88, label %bb55
+
+bb250:		; preds = %bb154, %bb128
+	ret i32 %idx.0
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -licm -loop-unroll -disable-output
+
+ at resonant = external global i32		; <i32*> [#uses=2]
+
+define void @weightadj() {
+entry:
+	br label %bb
+
+bb:		; preds = %bb158, %entry
+	store i32 0, i32* @resonant, align 4
+	br i1 false, label %g.exit, label %bb158
+
+g.exit:		; preds = %bb68, %bb
+	br i1 false, label %bb68, label %cond_true
+
+cond_true:		; preds = %g.exit
+	store i32 1, i32* @resonant, align 4
+	br label %bb68
+
+bb68:		; preds = %cond_true, %g.exit
+	%tmp71 = icmp slt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp71, label %g.exit, label %bb158
+
+bb158:		; preds = %bb68, %bb
+	br i1 false, label %bb, label %return
+
+return:		; preds = %bb158
+	ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-unswitch -instcombine -gvn -disable-output
+; RUN: opt < %s -loop-unswitch -instcombine -gvn -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR2372
+target triple = "i386-pc-linux-gnu"
+
+define i32 @func_3(i16 signext  %p_5, i16 signext  %p_6) nounwind  {
+entry:
+	%tmp3 = icmp eq i16 %p_5, 0		; <i1> [#uses=1]
+	%tmp1314 = sext i16 %p_6 to i32		; <i32> [#uses=1]
+	%tmp28 = icmp ugt i32 %tmp1314, 3		; <i1> [#uses=1]
+	%bothcond = or i1 %tmp28, false		; <i1> [#uses=1]
+	br label %bb
+bb:		; preds = %bb54, %entry
+	br i1 %tmp3, label %bb54, label %bb5
+bb5:		; preds = %bb
+	br i1 %bothcond, label %bb54, label %bb31
+bb31:		; preds = %bb5
+	br label %bb54
+bb54:		; preds = %bb31, %bb5, %bb
+	br i1 false, label %bb64, label %bb
+bb64:		; preds = %bb54
+	%tmp6566 = sext i16 %p_6 to i32		; <i32> [#uses=1]
+	%tmp68 = tail call i32 (...) @func_18( i32 1, i32 %tmp6566, i32 1 ) nounwind 		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @func_18(...)

Added: llvm/trunk/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt < %s -licm -loop-unswitch -disable-output
+ at g_56 = external global i16		; <i16*> [#uses=2]
+
+define i32 @func_67(i32 %p_68, i8 signext  %p_69, i8 signext  %p_71) nounwind  {
+entry:
+	br label %bb
+bb:		; preds = %bb44, %entry
+	br label %bb3
+bb3:		; preds = %bb36, %bb
+	%bothcond = or i1 false, false		; <i1> [#uses=1]
+	br i1 %bothcond, label %bb29, label %bb19
+bb19:		; preds = %bb3
+	br i1 false, label %bb36, label %bb29
+bb29:		; preds = %bb19, %bb3
+	ret i32 0
+bb36:		; preds = %bb19
+	store i16 0, i16* @g_56, align 2
+	br i1 false, label %bb44, label %bb3
+bb44:		; preds = %bb44, %bb36
+	%tmp46 = load i16, i16* @g_56, align 2		; <i16> [#uses=0]
+	br i1 false, label %bb, label %bb44
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-unswitch -stats -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -stats -disable-output 2>&1 | FileCheck %s
+; PR 3170
+
+define i32 @a(i32 %x, i32 %y) nounwind {
+; CHECK: 1 loop-unswitch - Number of branches unswitched
+; CHECK-NOT: Number of branches unswitched
+
+entry:
+	%0 = icmp ult i32 0, %y		; <i1> [#uses=1]
+	br i1 %0, label %bb.nph, label %bb4
+
+bb.nph:		; preds = %entry
+	%1 = icmp eq i32 %x, 0		; <i1> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb.nph, %bb3
+	%i.01 = phi i32 [ %3, %bb3 ], [ 0, %bb.nph ]		; <i32> [#uses=1]
+	br i1 %1, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%2 = tail call i32 (...) @b() nounwind		; <i32> [#uses=0]
+	br label %bb2
+
+bb2:		; preds = %bb, %bb1
+	%3 = add i32 %i.01, 1		; <i32> [#uses=2]
+	br label %bb3
+
+bb3:		; preds = %bb2
+	%i.0 = phi i32 [ %3, %bb2 ]		; <i32> [#uses=1]
+	%4 = icmp ult i32 %i.0, %y		; <i1> [#uses=1]
+	br i1 %4, label %bb, label %bb3.bb4_crit_edge
+
+bb3.bb4_crit_edge:		; preds = %bb3
+	br label %bb4
+
+bb4:		; preds = %bb3.bb4_crit_edge, %entry
+	ret i32 0
+}
+
+declare i32 @b(...)

Added: llvm/trunk/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -loop-unswitch
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa
+; PR8622
+ at g_38 = external global i32, align 4
+
+define void @func_67(i32 %p_68.coerce) nounwind {
+entry:
+  br i1 true, label %for.end12, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %g_38.promoted = load i32, i32* @g_38
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond, %bb.nph
+  %tobool.i = icmp eq i32 %p_68.coerce, 1
+  %xor4.i = xor i32 %p_68.coerce, 1
+  %call1 = select i1 %tobool.i, i32 0, i32 %xor4.i
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body
+  br i1 true, label %for.cond.for.end12_crit_edge, label %for.body
+
+for.cond.for.end12_crit_edge:                     ; preds = %for.cond
+  store i32 %call1, i32* @g_38
+  br label %for.end12
+
+for.end12:                                        ; preds = %for.cond.for.end12_crit_edge, %entry
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt -loop-unswitch -disable-output < %s
+; PR10031
+
+define i32 @test(i32 %command) {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %if.then14, %tailrecurse, %entry
+  br i1 undef, label %if.then, label %tailrecurse
+
+if.then:                                          ; preds = %tailrecurse
+  switch i32 %command, label %sw.bb [
+    i32 2, label %land.lhs.true
+    i32 0, label %land.lhs.true
+  ]
+
+land.lhs.true:                                    ; preds = %if.then, %if.then
+  br i1 undef, label %sw.bb, label %if.then14
+
+if.then14:                                        ; preds = %land.lhs.true
+  switch i32 %command, label %tailrecurse [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %if.then14
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,64 @@
+; RUN: opt < %s -sroa -loop-unswitch -disable-output
+; RUN: opt < %s -sroa -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR11016
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.2"
+
+%class.MyContainer.1.3.19.29 = type { [6 x %class.MyMemVarClass.0.2.18.28*] }
+%class.MyMemVarClass.0.2.18.28 = type { i32 }
+
+define void @_ZN11MyContainer1fEi(%class.MyContainer.1.3.19.29* %this, i32 %doit) uwtable ssp align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %inc1 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+  %conv = sext i32 %inc1 to i64
+  %cmp = icmp ult i64 %conv, 6
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tobool = icmp ne i32 %doit, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %idxprom = sext i32 %inc1 to i64
+  %array_ = getelementptr inbounds %class.MyContainer.1.3.19.29, %class.MyContainer.1.3.19.29* %this, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [6 x %class.MyMemVarClass.0.2.18.28*], [6 x %class.MyMemVarClass.0.2.18.28*]* %array_, i32 0, i64 %idxprom
+  %tmp4 = load %class.MyMemVarClass.0.2.18.28*, %class.MyMemVarClass.0.2.18.28** %arrayidx, align 8
+  %isnull = icmp eq %class.MyMemVarClass.0.2.18.28* %tmp4, null
+  br i1 %isnull, label %for.inc, label %delete.notnull
+
+delete.notnull:                                   ; preds = %if.then
+  invoke void @_ZN13MyMemVarClassD1Ev(%class.MyMemVarClass.0.2.18.28* %tmp4)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %delete.notnull
+  %0 = bitcast %class.MyMemVarClass.0.2.18.28* %tmp4 to i8*
+  call void @_ZdlPv(i8* %0) nounwind
+  br label %for.inc
+
+lpad:                                             ; preds = %delete.notnull
+  %1 = landingpad { i8*, i32 }
+          cleanup
+  %2 = extractvalue { i8*, i32 } %1, 0
+  %3 = extractvalue { i8*, i32 } %1, 1
+  %4 = bitcast %class.MyMemVarClass.0.2.18.28* %tmp4 to i8*
+  call void @_ZdlPv(i8* %4) nounwind
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %2, 0
+  %lpad.val7 = insertvalue { i8*, i32 } %lpad.val, i32 %3, 1
+  resume { i8*, i32 } %lpad.val7
+
+for.inc:                                          ; preds = %invoke.cont, %if.then, %for.body
+  %inc = add nsw i32 %inc1, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+declare void @_ZN13MyMemVarClassD1Ev(%class.MyMemVarClass.0.2.18.28*)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZdlPv(i8*) nounwind

Added: llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,95 @@
+; REQUIRES: asserts
+; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info < %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+
+; STATS: 2 loop-unswitch - Number of switches unswitched
+
+; CHECK:      %1 = icmp eq i32 %c, 1
+; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us
+; CHECK-NEXT:   %var_val.us = load i32, i32* %var
+; CHECK-NEXT:   switch i32 1, label %default.us-lcssa.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      inc.us:                                           ; preds = %loop_begin.us
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   %2 = icmp eq i32 %c, 2
+; CHECK-NEXT:   br i1 %2, label %.split.split.us, label %.split..split.split_crit_edge
+
+; CHECK:      .split..split.split_crit_edge:                    ; preds = %.split
+; CHECK-NEXT:   br label %.split.split
+
+; CHECK:      .split.split.us:                                  ; preds = %.split
+; CHECK-NEXT:   br label %loop_begin.us1
+
+; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us5, %.split.split.us
+; CHECK-NEXT:   %var_val.us2 = load i32, i32* %var
+; CHECK-NEXT:   switch i32 2, label %default.us-lcssa.us-lcssa.us [
+; CHECK-NEXT:     i32 1, label %inc.us4
+; CHECK-NEXT:     i32 2, label %dec.us3
+; CHECK-NEXT:   ]
+
+; CHECK:      dec.us3:                                          ; preds = %loop_begin.us1
+; CHECK-NEXT:   call void @decf() [[NOR_NUW]]
+; CHECK-NEXT:   br label %loop_begin.backedge.us5
+
+; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split.split
+; CHECK-NEXT:   %var_val = load i32, i32* %var
+; CHECK-NEXT:   switch i32 %c, label %default.us-lcssa.us-lcssa [
+; CHECK-NEXT:     i32 1, label %inc
+; CHECK-NEXT:     i32 2, label %dec
+; CHECK-NEXT:   ]
+
+; CHECK:      inc:                                              ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa, label %inc.split
+
+; CHECK:      dec:                                              ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable6, label %dec.split
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32, i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32, i32* %var
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+  call void @decf() noreturn nounwind
+  br label %loop_begin
+default:
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }

Added: llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,88 @@
+; REQUIRES: asserts
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 13 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info < %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+
+; STATS: 1 loop-unswitch - Number of switches unswitched
+
+; ModuleID = '../llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll'
+
+; CHECK:        %1 = icmp eq i32 %c, 1
+; CHECK-NEXT:   br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us
+; CHECK:        switch i32 1, label %second_switch.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
+; CHECK-NEXT:   switch i32 %d, label %default.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+; CHECK-NEXT:   ]
+
+; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split
+; CHECK:        switch i32 %c, label %second_switch [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      loop_begin.inc_crit_edge:                         ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable, label %inc
+
+; CHECK:      second_switch:                                    ; preds = %loop_begin
+; CHECK-NEXT:   switch i32 %d, label %default [
+; CHECK-NEXT:     i32 1, label %inc
+; CHECK-NEXT:   ]
+
+; CHECK:      inc:                                              ; preds = %loop_begin.inc_crit_edge, %second_switch
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
+; CHECK-NEXT:   br label %loop_begin.backedge
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32, i32* %mem
+  %d = load i32, i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32, i32* %var
+
+  switch i32 %c, label %second_switch [
+      i32 1, label %inc
+  ]
+
+second_switch:
+  switch i32 %d, label %default [
+      i32 1, label %inc
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+
+default:
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }

Added: llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,142 @@
+; REQUIRES: asserts
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info < %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+
+; STATS: 3 loop-unswitch - Number of switches unswitched
+
+; CHECK:        %1 = icmp eq i32 %c, 1
+; CHECK-NEXT:   br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   %2 = icmp eq i32 %d, 1
+; CHECK-NEXT:   br i1 %2, label %.split.us.split.us, label %.split.us..split.us.split_crit_edge
+
+; CHECK:      .split.us..split.us.split_crit_edge:              ; preds = %.split.us
+; CHECK-NEXT:   br label %.split.us.split
+
+; CHECK:      .split.us.split.us:                               ; preds = %.split.us
+; CHECK-NEXT:   br label %loop_begin.us.us
+
+; CHECK:      loop_begin.us.us:                                 ; preds = %loop_begin.backedge.us.us, %.split.us.split.us
+; CHECK-NEXT:   %var_val.us.us = load i32, i32* %var
+; CHECK-NEXT:   switch i32 1, label %second_switch.us.us [
+; CHECK-NEXT:     i32 1, label %inc.us.us
+
+; CHECK:      second_switch.us.us:                              ; preds = %loop_begin.us.us
+; CHECK-NEXT:   switch i32 1, label %default.us.us [
+; CHECK-NEXT:     i32 1, label %inc.us.us
+
+; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
+; CHECK-NEXT:   call void @incf() [[NOR_NUW:#[0-9]+]]
+; CHECK-NEXT:   br label %loop_begin.backedge.us.us
+
+; CHECK:      .split.us.split:                                  ; preds = %.split.us..split.us.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us.split
+; CHECK-NEXT:   %var_val.us = load i32, i32* %var
+; CHECK-NEXT:   switch i32 1, label %second_switch.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
+; CHECK-NEXT:   switch i32 %d, label %default.us [
+; CHECK-NEXT:     i32 1, label %second_switch.us.inc.us_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      second_switch.us.inc.us_crit_edge:                ; preds = %second_switch.us
+; CHECK-NEXT:   br i1 true, label %us-unreachable8, label %inc.us
+
+; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   %3 = icmp eq i32 %d, 1
+; CHECK-NEXT:   br i1 %3, label %.split.split.us, label %.split..split.split_crit_edge
+
+; CHECK:      .split..split.split_crit_edge:                    ; preds = %.split
+; CHECK-NEXT:   br label %.split.split
+
+; CHECK:      .split.split.us:                                  ; preds = %.split
+; CHECK-NEXT:   br label %loop_begin.us1
+
+; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us6, %.split.split.us
+; CHECK-NEXT:   %var_val.us2 = load i32, i32* %var
+; CHECK-NEXT:   switch i32 %c, label %second_switch.us3 [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge.us
+; CHECK-NEXT:   ]
+
+; CHECK:      second_switch.us3:                                ; preds = %loop_begin.us1
+; CHECK-NEXT:   switch i32 1, label %default.us5 [
+; CHECK-NEXT:     i32 1, label %inc.us4
+; CHECK-NEXT:   ]
+
+; CHECK:      inc.us4:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us3
+; CHECK-NEXT:   call void @incf() [[NOR_NUW]]
+; CHECK-NEXT:   br label %loop_begin.backedge.us6
+
+; CHECK:      loop_begin.inc_crit_edge.us:                      ; preds = %loop_begin.us1
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us4
+
+; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split.split
+; CHECK-NEXT:   %var_val = load i32, i32* %var
+; CHECK-NEXT:   switch i32 %c, label %second_switch [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      loop_begin.inc_crit_edge:                         ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa, label %inc
+
+; CHECK:      second_switch:                                    ; preds = %loop_begin
+; CHECK-NEXT:   switch i32 %d, label %default [
+; CHECK-NEXT:     i32 1, label %second_switch.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      second_switch.inc_crit_edge:                      ; preds = %second_switch
+; CHECK-NEXT:   br i1 true, label %us-unreachable7, label %inc
+
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32, i32* %mem
+  %d = load i32, i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32, i32* %var
+
+  switch i32 %c, label %second_switch [
+      i32 1, label %inc
+  ]
+
+second_switch:
+  switch i32 %d, label %default [
+      i32 1, label %inc
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+
+default:
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
+
+; CHECK: attributes #0 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }

Added: llvm/trunk/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt < %s -S -loop-unswitch -verify-loop-info -verify-dom-info | FileCheck %s
+; RUN: opt < %s -S -loop-unswitch -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa | FileCheck %s
+; PR12343: -loop-unswitch crash on indirect branch
+
+; CHECK:       %0 = icmp eq i64 undef, 0
+; CHECK-NEXT:  br i1 %0, label %"5", label %"4"
+
+; CHECK:       "5":                                              ; preds = %entry
+; CHECK-NEXT:  br label %"16"
+
+; CHECK:       "16":                                             ; preds = %"22", %"5"
+; CHECK-NEXT:  indirectbr i8* undef, [label %"22", label %"33"]
+
+; CHECK:       "22":                                             ; preds = %"16"
+; CHECK-NEXT:  br i1 %0, label %"16", label %"26"
+
+; CHECK:       "26":                                             ; preds = %"22"
+; CHECK-NEXT:  unreachable
+
+define void @foo() {
+entry:
+  %0 = icmp eq i64 undef, 0
+  br i1 %0, label %"5", label %"4"
+
+"4":                                              ; preds = %entry
+  unreachable
+
+"5":                                              ; preds = %entry
+  br label %"16"
+
+"16":                                             ; preds = %"22", %"5"
+  indirectbr i8* undef, [label %"22", label %"33"]
+
+"22":                                             ; preds = %"16"
+  br i1 %0, label %"16", label %"26"
+
+"26":                                             ; preds = %"22"
+  unreachable
+
+"33":                                             ; preds = %"16"
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,97 @@
+; RUN: opt < %s -basicaa -instcombine -inline -functionattrs -licm -loop-unswitch -gvn -verify
+; PR12573
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379 = type { %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 }
+%class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376 = type { %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* }
+%class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 = type { %class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377* }
+%class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377 = type { i8 }
+
+define void @_Z23get_reconstruction_pathv() uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  %c = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, align 8
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.end, %entry
+  invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %for.cond
+  invoke void @_ZN1C3endEv()
+          to label %for.cond3 unwind label %lpad
+
+for.cond3:                                        ; preds = %invoke.cont6, %invoke.cont
+  invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c)
+          to label %invoke.cont4 unwind label %lpad
+
+invoke.cont4:                                     ; preds = %for.cond3
+  invoke void @_ZN1C3endEv()
+          to label %invoke.cont6 unwind label %lpad
+
+invoke.cont6:                                     ; preds = %invoke.cont4
+  br i1 undef, label %for.cond3, label %for.end
+
+lpad:                                             ; preds = %for.end, %invoke.cont4, %for.cond3, %invoke.cont, %for.cond
+  %0 = landingpad { i8*, i32 }
+          cleanup
+  resume { i8*, i32 } undef
+
+for.end:                                          ; preds = %invoke.cont6
+  invoke void @_ZN1C13_M_insert_auxER1D()
+          to label %for.cond unwind label %lpad
+}
+
+define void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this) uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, align 8
+  store %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr, align 8
+  %this1 = load %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr
+  %px = getelementptr inbounds %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this1, i32 0, i32 0
+  %0 = load %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376** %px, align 8
+  %tobool = icmp ne %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376* %0, null
+  br i1 %tobool, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  call void @_Z10__assert13v() noreturn
+  unreachable
+
+cond.end:                                         ; preds = %entry
+  ret void
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZN1C3endEv()
+
+define void @_ZN1C13_M_insert_auxER1D() uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1DD1Ev() unnamed_addr uwtable inlinehint ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1DD2Ev() unnamed_addr uwtable inlinehint ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BD1Ev() unnamed_addr uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BD2Ev() unnamed_addr uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BaSERS_() uwtable ssp align 2 {
+entry:
+  unreachable
+}
+
+declare void @_Z10__assert13v() noreturn

Added: llvm/trunk/test/Transforms/LoopUnswitch/2012-05-20-Phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2012-05-20-Phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2012-05-20-Phi.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2012-05-20-Phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR12887
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at a = common global i32 0, align 4
+ at c = common global i32 0, align 4
+ at b = common global i32 0, align 4
+
+define void @func() noreturn nounwind uwtable {
+entry:
+  %0 = load i32, i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  %1 = load i32, i32* @b, align 4
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %d.0 = phi i8 [ undef, %entry ], [ %conv2, %while.body ]
+  %conv = sext i8 %d.0 to i32
+  %cond = select i1 %tobool, i32 0, i32 %conv
+  %conv11 = zext i8 %d.0 to i32
+  %add = add i32 %1, %conv11
+  %conv2 = trunc i32 %add to i8
+  br label %while.body
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,77 @@
+;RUN: opt  -loop-unswitch -simplifycfg -S < %s | FileCheck %s
+
+define i32 @foo(i32 %a, i32 %b) {
+;CHECK-LABEL: foo
+entry:
+  br label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %cmp0 = icmp sgt i32 %b, 0
+  br i1 %cmp0, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+  %inc.i = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %mul.i = phi i32 [ 3, %for.body.lr.ph ], [ %mul.p, %for.inc ]
+  %add.i = phi i32 [ %a, %for.body.lr.ph ], [ %add.p, %for.inc ]
+  %cmp1 = icmp eq i32 %a, 12345
+  br i1 %cmp1, label %if.then, label %if.else, !prof !0
+; CHECK: %cmp1 = icmp eq i32 %a, 12345
+; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0
+if.then:                                          ; preds = %for.body
+; CHECK: for.body.us:
+; CHECK: add nsw i32 %{{.*}}, 123
+; CHECK: %exitcond.us = icmp eq i32 %inc.us, %b
+; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %for.body.us
+  %add = add nsw i32 %add.i, 123
+  br label %for.inc
+
+if.else:                                          ; preds = %for.body
+  %mul = mul nsw i32 %mul.i, %b
+  br label %for.inc
+; CHECK: for.body:
+; CHECK: %mul = mul nsw i32 %mul.i, %b
+; CHECK: %inc = add nuw nsw i32 %inc.i, 1
+; CHECK: %exitcond = icmp eq i32 %inc, %b
+; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %for.body
+for.inc:                                          ; preds = %if.then, %if.else
+  %mul.p = phi i32 [ %b, %if.then ], [ %mul, %if.else ]
+  %add.p = phi i32 [ %add, %if.then ], [ %a, %if.else ]
+  %inc = add nuw nsw i32 %inc.i, 1
+  %exitcond = icmp eq i32 %inc, %b
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.inc, %for.body.lr.ph
+  %t2 = phi i32 [ %b, %for.body.lr.ph ], [ %mul.p, %for.inc ]
+  %t1 = phi i32 [ %a, %for.body.lr.ph ], [ %add.p, %for.inc ]
+  %add3 = add nsw i32 %t2, %t1
+  ret i32 %add3
+}
+
+define void @foo_swapped(i32 %a, i32 %b) {
+;CHECK-LABEL: foo_swapped
+entry:
+  br label %for.body
+;CHECK: entry:
+;CHECK-NEXT: %cmp1 = icmp eq i32 1, 2
+;CHECK-NEXT: br i1 %cmp1, label %for.body, label %for.cond.cleanup.split, !prof !1
+;CHECK: for.body:
+for.body:                                         ; preds = %for.inc, %entry
+  %inc.i = phi i32 [ 0, %entry ], [ %inc, %if.then ]
+  %add.i = phi i32 [ 100, %entry ], [ %add, %if.then ]
+  %inc = add nuw nsw i32 %inc.i, 1
+  %cmp1 = icmp eq i32 1, 2
+  br i1 %cmp1, label %if.then, label  %for.cond.cleanup, !prof !0
+
+if.then:                                          ; preds = %for.body
+  %add = add nsw i32 %a, %add.i
+
+  %exitcond = icmp eq i32 %inc, %b
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.inc, %for.body.lr.ph, %for.body
+  ret void
+}
+!0 = !{!"branch_weights", i32 64, i32 4}
+
+;CHECK: !0 = !{!"branch_weights", i32 64, i32 4}
+;CHECK: !1 = !{!"branch_weights", i32 4, i32 64}

Added: llvm/trunk/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -loop-unswitch -S | FileCheck %s
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s
+
+; In cases where two address spaces do not have the same size pointer, the
+; input for the addrspacecast should not be used as a substitute for itself
+; when manipulating the pointer.
+
+target datalayout = "e-m:e-p:16:16-p1:32:16-i32:16-i64:16-n8:16"
+
+define void @foo() {
+; CHECK-LABEL: @foo
+entry:
+  %arrayidx.i1 = getelementptr inbounds i16, i16* undef, i16 undef
+  %arrayidx.i = addrspacecast i16* %arrayidx.i1 to i16 addrspace(1)*
+  br i1 undef, label %for.body.i, label %bar.exit
+
+for.body.i:                                       ; preds = %for.body.i, %entry
+; When we call makeLoopInvariant (i.e. trivial LICM) on this load, it 
+; will try to find the base object to prove deferenceability.  If we look
+; through the addrspacecast, we'll fail an assertion about bitwidths matching
+; CHECK-LABEL: for.body.i
+; CHECK:   %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+  %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+  %cmp1.i = icmp eq i16 %0, 0
+  br i1 %cmp1.i, label %bar.exit, label %for.body.i
+
+bar.exit:                                         ; preds = %for.body.i, %entry
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,85 @@
+; RUN: opt -mtriple=amdgcn-- -O3 -S %s | FileCheck %s
+
+; Check that loop unswitch happened and condition hoisted out of the loop.
+; Condition is uniform so all targets should perform unswitching.
+
+; CHECK-LABEL: {{^}}define amdgpu_kernel void @uniform_unswitch
+; CHECK: entry:
+; CHECK-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp
+; CHECK-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456
+; CHECK-NEXT: and i1 [[LOOP_COND]], [[IF_COND]]
+; CHECK-NEXT: br i1
+
+define amdgpu_kernel void @uniform_unswitch(i32 * nocapture %out, i32 %n, i32 %x) {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %cmp1 = icmp eq i32 %x, 123456
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.inc
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+  %i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.07
+  store i32 %i.07, i32 * %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %inc = add nuw nsw i32 %i.07, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+; Check that loop unswitch does not happen if condition is divergent.
+
+; CHECK-LABEL: {{^}}define amdgpu_kernel void @divergent_unswitch
+; CHECK: entry:
+; CHECK: icmp
+; CHECK: [[IF_COND:%[a-z0-9]+]] = icmp {{.*}} 567890
+; CHECK: br label
+; CHECK: br i1 [[IF_COND]]
+
+define amdgpu_kernel void @divergent_unswitch(i32 * nocapture %out, i32 %n) {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %call = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+  %cmp2 = icmp eq i32 %call, 567890
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.inc
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  br i1 %cmp2, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.010
+  store i32 %i.010, i32 * %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %inc = add nuw nsw i32 %i.010, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }

Added: llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/AMDGPU/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True

Added: llvm/trunk/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-threshold=0 -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-unswitch -loop-unswitch-threshold=0 -enable-mssa-loop-dependency=true -verify-memoryssa -S 2>&1 | FileCheck %s
+
+; This is to test trivial loop unswitch only happens when trivial condition
+; itself is an LIV loop condition (not partial LIV which could occur in and/or).
+
+define i32 @test(i1 %cond1, i32 %var1) {
+entry:
+  br label %loop_begin
+
+loop_begin:
+  %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+  %cond2 = icmp eq i32 %var3, 10
+  %cond.and = and i1 %cond1, %cond2
+  
+; %cond.and only has %cond1 as LIV so no unswitch should happen.
+; CHECK: br i1 %cond.and, label %do_something, label %loop_exit
+  br i1 %cond.and, label %do_something, label %loop_exit 
+
+do_something:
+  %var2 = add i32 %var3, 1
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @some_func() noreturn 

Added: llvm/trunk/test/Transforms/LoopUnswitch/basictest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/basictest.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/basictest.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/basictest.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,319 @@
+; RUN: opt < %s -loop-unswitch -verify-loop-info -S < %s 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-unswitch -verify-loop-info -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s 2>&1 | FileCheck %s
+
+define i32 @test(i32* %A, i1 %C) {
+entry:
+	br label %no_exit
+no_exit:		; preds = %no_exit.backedge, %entry
+	%i.0.0 = phi i32 [ 0, %entry ], [ %i.0.0.be, %no_exit.backedge ]		; <i32> [#uses=3]
+	%gep.upgrd.1 = zext i32 %i.0.0 to i64		; <i64> [#uses=1]
+	%tmp.7 = getelementptr i32, i32* %A, i64 %gep.upgrd.1		; <i32*> [#uses=4]
+	%tmp.13 = load i32, i32* %tmp.7		; <i32> [#uses=2]
+	%tmp.14 = add i32 %tmp.13, 1		; <i32> [#uses=1]
+	store i32 %tmp.14, i32* %tmp.7
+	br i1 %C, label %then, label %endif
+then:		; preds = %no_exit
+	%tmp.29 = load i32, i32* %tmp.7		; <i32> [#uses=1]
+	%tmp.30 = add i32 %tmp.29, 2		; <i32> [#uses=1]
+	store i32 %tmp.30, i32* %tmp.7
+	%inc9 = add i32 %i.0.0, 1		; <i32> [#uses=2]
+	%tmp.112 = icmp ult i32 %inc9, 100000		; <i1> [#uses=1]
+	br i1 %tmp.112, label %no_exit.backedge, label %return
+no_exit.backedge:		; preds = %endif, %then
+	%i.0.0.be = phi i32 [ %inc9, %then ], [ %inc, %endif ]		; <i32> [#uses=1]
+	br label %no_exit
+endif:		; preds = %no_exit
+	%inc = add i32 %i.0.0, 1		; <i32> [#uses=2]
+	%tmp.1 = icmp ult i32 %inc, 100000		; <i1> [#uses=1]
+	br i1 %tmp.1, label %no_exit.backedge, label %return
+return:		; preds = %endif, %then
+	ret i32 %tmp.13
+}
+
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the noduplicate call.
+
+; CHECK-LABEL: @test2(
+define i32 @test2(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32, i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32, i32* %var
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+; CHECK: call void @decf()
+; CHECK-NOT: call void @decf()
+  call void @decf() noreturn nounwind noduplicate
+  br label %loop_begin
+default:
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+; CHECK: }
+}
+
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the convergent call that is not control-dependent on the unswitch condition.
+
+; CHECK-LABEL: @test3(
+define i32 @test3(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32, i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32, i32* %var
+
+; CHECK: call void @conv()
+; CHECK-NOT: call void @conv()
+  call void @conv() convergent
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+  call void @decf() noreturn nounwind
+  br label %loop_begin
+default:
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+; CHECK: }
+}
+
+; Make sure we unswitch %a == 0 out of the loop.
+;
+; CHECK: define void @and_i2_as_switch_input(i2
+; CHECK: entry:
+; This is an indication that the loop has been unswitched.
+; CHECK: icmp eq i2 %a, 0
+; CHECK: br
+; There should be no more unswitching after the 1st unswitch.
+; CHECK-NOT: icmp eq
+; CHECK: ret
+define void @and_i2_as_switch_input(i2 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i2 [ 0, %entry ], [ %inc, %for.inc ]
+  %and = and i2 %a, %i
+  %and1 = and i2 %and, %i
+  switch i2 %and1, label %sw.default [
+    i2 0, label %sw.bb
+    i2 1, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i2 %i, 1
+  %cmp = icmp slt i2 %inc, 3 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Make sure we unswitch %a == !0 out of the loop.
+;
+; CHECK: define void @or_i2_as_switch_input(i2
+; CHECK: entry:
+; This is an indication that the loop has been unswitched.
+; CHECK: icmp eq i2 %a, -1
+; CHECK: br
+; There should be no more unswitching after the 1st unswitch.
+; CHECK-NOT: icmp eq
+; CHECK: ret
+define void @or_i2_as_switch_input(i2 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i2 [ 0, %entry ], [ %inc, %for.inc ]
+  %or = or i2 %a, %i
+  %or1 = or i2 %or, %i
+  switch i2 %or1, label %sw.default [
+    i2 2, label %sw.bb
+    i2 3, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i2 %i, 1
+  %cmp = icmp slt i2 %inc, 3 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Make sure we unswitch %a == !0 out of the loop. Even we do not
+; have it as a case value. Unswitching it out allows us to simplify
+; the or operator chain.
+;
+; CHECK: define void @or_i2_as_switch_input_unswitch_default(i2
+; CHECK: entry:
+; This is an indication that the loop has been unswitched.
+; CHECK: icmp eq i2 %a, -1
+; CHECK: br
+; There should be no more unswitching after the 1st unswitch.
+; CHECK-NOT: icmp eq
+; CHECK: ret
+define void @or_i2_as_switch_input_unswitch_default(i2 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i2 [ 0, %entry ], [ %inc, %for.inc ]
+  %or = or i2 %a, %i
+  %or1 = or i2 %or, %i
+  switch i2 %or1, label %sw.default [
+    i2 1, label %sw.bb
+    i2 2, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i2 %i, 1
+  %cmp = icmp slt i2 %inc, 3 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Make sure we don't unswitch, as we can not find an input value %a
+; that will effectively unswitch 0 or 3 out of the loop.
+;
+; CHECK: define void @and_or_i2_as_switch_input(i2
+; CHECK: entry:
+; This is an indication that the loop has NOT been unswitched.
+; CHECK-NOT: icmp
+; CHECK: br
+define void @and_or_i2_as_switch_input(i2 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i2 [ 0, %entry ], [ %inc, %for.inc ]
+  %and = and i2 %a, %i 
+  %or = or i2 %and, %i
+  switch i2 %or, label %sw.default [
+    i2 0, label %sw.bb
+    i2 3, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i2 %i, 1
+  %cmp = icmp slt i2 %inc, 3 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Make sure we don't unswitch, as we can not find an input value %a
+; that will effectively unswitch true/false out of the loop.
+;
+; CHECK: define void @and_or_i1_as_branch_input(i1
+; CHECK: entry:
+; This is an indication that the loop has NOT been unswitched.
+; CHECK-NOT: icmp
+; CHECK: br
+define void @and_or_i1_as_branch_input(i1 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i1 [ 0, %entry ], [ %inc, %for.inc ]
+  %and = and i1 %a, %i 
+  %or = or i1 %and, %i
+  br i1 %or, label %sw.bb, label %sw.bb1
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  br label %sw.epilog
+
+sw.epilog:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i1 %i, 1
+  %cmp = icmp slt i1 %inc, 1 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
+declare void @conv() convergent

Added: llvm/trunk/test/Transforms/LoopUnswitch/cleanuppad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/cleanuppad.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/cleanuppad.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/cleanuppad.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,45 @@
+; RUN: opt -S -loop-unswitch < %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+define void @f(i32 %doit, i1 %x, i1 %y) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %tobool = icmp eq i32 %doit, 0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  br i1 %x, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br i1 %tobool, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  br i1 %y, label %for.inc, label %delete.notnull
+
+delete.notnull:                                   ; preds = %if.then
+  invoke void @g()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %delete.notnull
+  br label %for.inc
+
+lpad:                                             ; preds = %delete.notnull
+  %cp = cleanuppad within none []
+  cleanupret from %cp unwind to caller
+
+for.inc:                                          ; preds = %invoke.cont, %if.then, %for.body
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
+
+; CHECK-LABEL: define void @f(
+; CHECK: cleanuppad within none []
+; CHECK-NOT: cleanuppad
+
+attributes #0 = { ssp uwtable }

Added: llvm/trunk/test/Transforms/LoopUnswitch/copy-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/copy-metadata.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/copy-metadata.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/copy-metadata.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt < %s -loop-unswitch -S < %s 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s 2>&1 | FileCheck %s
+
+; This test checks if unswitched condition preserve make.implicit metadata.
+
+define i32 @test(i1 %cond) {
+; CHECK-LABEL: @test(
+; CHECK:  br i1 %cond, label %..split_crit_edge, label %.loop_exit.split_crit_edge, !make.implicit !0
+  br label %loop_begin
+
+loop_begin:
+  br i1 %cond, label %continue, label %loop_exit, !make.implicit !0
+
+continue:
+  call void @some_func()
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @some_func()
+
+!0 = !{}

Added: llvm/trunk/test/Transforms/LoopUnswitch/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/crash.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+define void @test1(i32* %S2) {
+entry:
+	br i1 false, label %list_Length.exit, label %cond_true.i
+cond_true.i:		; preds = %entry
+	ret void
+list_Length.exit:		; preds = %entry
+	br i1 false, label %list_Length.exit9, label %cond_true.i5
+cond_true.i5:		; preds = %list_Length.exit
+	ret void
+list_Length.exit9:		; preds = %list_Length.exit
+	br i1 false, label %bb78, label %return
+bb44:		; preds = %bb78, %cond_next68
+	br i1 %tmp49.not, label %bb62, label %bb62.loopexit
+bb62.loopexit:		; preds = %bb44
+	br label %bb62
+bb62:		; preds = %bb62.loopexit, %bb44
+	br i1 false, label %return.loopexit, label %cond_next68
+cond_next68:		; preds = %bb62
+	br i1 false, label %return.loopexit, label %bb44
+bb78:		; preds = %list_Length.exit9
+	%tmp49.not = icmp eq i32* %S2, null		; <i1> [#uses=1]
+	br label %bb44
+return.loopexit:		; preds = %cond_next68, %bb62
+	%retval.0.ph = phi i32 [ 1, %cond_next68 ], [ 0, %bb62 ]		; <i32> [#uses=1]
+	br label %return
+return:		; preds = %return.loopexit, %list_Length.exit9
+	%retval.0 = phi i32 [ 0, %list_Length.exit9 ], [ %retval.0.ph, %return.loopexit ]		; <i32> [#uses=0]
+	ret void
+}
+
+define void @test2() nounwind {
+entry:
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %and.i13521 = and <4 x i1> undef, undef         ; <<4 x i1>> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %or.i = select <4 x i1> %and.i13521, <4 x i32> undef, <4 x i32> undef ; <<4 x i32>> [#uses=0]
+  br i1 false, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; PR6879
+define i32* @test3(i32** %p_45, i16 zeroext %p_46, i64 %p_47, i64 %p_48, i16 signext %p_49) nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond4, %entry
+  br i1 false, label %for.cond4, label %for.end88
+
+for.cond4:                                        ; preds = %for.cond
+  %conv46 = trunc i32 0 to i8                     ; <i8> [#uses=2]
+  %cmp60 = icmp sgt i8 %conv46, 124               ; <i1> [#uses=1]
+  %or.cond = and i1 undef, %cmp60                 ; <i1> [#uses=1]
+  %cond = select i1 %or.cond, i8 %conv46, i8 undef ; <i8> [#uses=0]
+  br label %for.cond
+
+for.end88:                                        ; preds = %for.cond
+  ret i32* undef
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/elseif-non-exponential-behavior.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/elseif-non-exponential-behavior.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/elseif-non-exponential-behavior.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/elseif-non-exponential-behavior.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,63 @@
+; RUN: opt -loop-unswitch -S - < %s | FileCheck %s
+; RUN: opt -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S - < %s | FileCheck %s
+
+;CHECK-LABEL: @b
+;CHECK: [[Loop1:for\.end.*]]:                              ; preds = %for.cond.us
+;CHECK-NEXT:  %[[PhiVar1:pdt.*]] = phi i32 [ %pdt.0.us, %for.cond.us ]
+;CHECK: [[Loop2:for\.end.*]]:                     ; preds = %for.cond.us1
+;CHECK-NEXT:  %[[PhiVar2:pdt.*]] = phi i32 [ %pdt.0.us2, %for.cond.us1 ]
+;CHECK: [[Loop3:for\.end.*]]:                        ; preds = %for.cond
+;CHECK-NEXT:  %[[PhiVar3:pdt.*]] = phi i32 [ %pdt.0, %for.cond ]
+;CHECK: [[Join1:for\.end.*]]:                                 ; preds = %[[Loop2]], %[[Loop3]]
+;CHECK-NEXT:  %[[PhiRes1:pdt.*]] = phi i32 [ %[[PhiVar3]], %[[Loop3]] ], [ %[[PhiVar2]], %[[Loop2]] ]
+;CHECK: for.end:                                          ; preds = %[[Loop1]], %[[Join1]]
+;CHECK-NEXT:  %[[PhiRes2:pdt.*]] = phi i32 [ %[[PhiRes1]], %[[Join1]] ], [ %[[PhiVar1]], %[[Loop1]] ]
+;CHECK-NEXT:  ret i32 %[[PhiRes2]]
+
+; Function Attrs: nounwind uwtable
+define i32 @b(i32 %x, i32 %y) #0 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %pdt.0 = phi i32 [ 1, %entry ], [ %pdt.2, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tobool = icmp ne i32 %x, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %for.body
+  %mul = mul nsw i32 %pdt.0, 2
+  br label %if.end6
+
+if.else:                                          ; preds = %for.body
+  %tobool1 = icmp ne i32 %y, 0
+  br i1 %tobool1, label %if.then2, label %if.else4
+
+if.then2:                                         ; preds = %if.else
+  %mul3 = mul nsw i32 %pdt.0, 3
+  br label %if.end
+
+if.else4:                                         ; preds = %if.else
+  %mul5 = mul nsw i32 %pdt.0, 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else4, %if.then2
+  %pdt.1 = phi i32 [ %mul3, %if.then2 ], [ %mul5, %if.else4 ]
+  br label %if.end6
+
+if.end6:                                          ; preds = %if.end, %if.then
+  %pdt.2 = phi i32 [ %mul, %if.then ], [ %pdt.1, %if.end ]
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end6
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret i32 %pdt.0
+}
+

Added: llvm/trunk/test/Transforms/LoopUnswitch/exponential-behavior.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/exponential-behavior.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/exponential-behavior.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/exponential-behavior.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,52 @@
+; RUN: opt -loop-unswitch -S < %s | FileCheck %s
+; RUN: opt -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
+
+define void @f(i32 %n, i32* %ptr) {
+; CHECK-LABEL: @f(
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+  %iv.inc = add i32 %iv, 1
+  %unswitch_cond_root = icmp ne i32 %iv.inc, 42
+  %us.0 = and i1 %unswitch_cond_root, %unswitch_cond_root
+  %us.1 = and i1 %us.0, %us.0
+  %us.2 = and i1 %us.1, %us.1
+  %us.3 = and i1 %us.2, %us.2
+  %us.4 = and i1 %us.3, %us.3
+  %us.5 = and i1 %us.4, %us.4
+  %us.6 = and i1 %us.5, %us.5
+  %us.7 = and i1 %us.6, %us.6
+  %us.8 = and i1 %us.7, %us.7
+  %us.9 = and i1 %us.8, %us.8
+  %us.10 = and i1 %us.9, %us.9
+  %us.11 = and i1 %us.10, %us.10
+  %us.12 = and i1 %us.11, %us.11
+  %us.13 = and i1 %us.12, %us.12
+  %us.14 = and i1 %us.13, %us.13
+  %us.15 = and i1 %us.14, %us.14
+  %us.16 = and i1 %us.15, %us.15
+  %us.17 = and i1 %us.16, %us.16
+  %us.18 = and i1 %us.17, %us.17
+  %us.19 = and i1 %us.18, %us.18
+  %us.20 = and i1 %us.19, %us.19
+  %us.21 = and i1 %us.20, %us.20
+  %us.22 = and i1 %us.21, %us.21
+  %us.23 = and i1 %us.22, %us.22
+  %us.24 = and i1 %us.23, %us.23
+  %us.25 = and i1 %us.24, %us.24
+  %us.26 = and i1 %us.25, %us.25
+  %us.27 = and i1 %us.26, %us.26
+  %us.28 = and i1 %us.27, %us.27
+  %us.29 = and i1 %us.28, %us.28
+  br i1 %us.29, label %leave, label %be
+
+be:
+  store volatile i32 0, i32* %ptr
+  %becond = icmp ult i32 %iv.inc, %n
+  br i1 %becond, label %leave, label %loop
+
+leave:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/guards.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/guards.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/guards.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/guards.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,98 @@
+; RUN: opt -S -loop-unswitch < %s | FileCheck %s
+; RUN: opt -S -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @f_0(i32 %n, i32* %ptr, i1 %c) {
+; CHECK-LABEL: @f_0(
+; CHECK: loop.us:
+; CHECK-NOT: guard
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
+  %iv.inc = add i32 %iv, 1
+  call void(i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"() ]
+  store volatile i32 0, i32* %ptr
+  %becond = icmp ult i32 %iv.inc, %n
+  br i1 %becond, label %leave, label %loop
+
+leave:
+  ret void
+}
+
+define void @f_1(i32 %n, i32* %ptr, i1 %c_0, i1 %c_1) {
+; CHECK-LABEL: @f_1(
+; CHECK: loop.us.us:
+; CHECK-NOT: guard
+; CHECK: loop.us:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"(i32 2) ]
+; CHECK-NOT: guard
+; CHECK: loop.us1:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"(i32 1) ]
+; CHECK-NOT: guard
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"(i32 1) ]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"(i32 2) ]
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
+  %iv.inc = add i32 %iv, 1
+  call void(i1, ...) @llvm.experimental.guard(i1 %c_0) [ "deopt"(i32 1) ]
+  store volatile i32 0, i32* %ptr
+  call void(i1, ...) @llvm.experimental.guard(i1 %c_1) [ "deopt"(i32 2) ]
+  %becond = icmp ult i32 %iv.inc, %n
+  br i1 %becond, label %leave, label %loop
+
+leave:
+  ret void
+}
+
+; Basic negative test
+
+define void @f_3(i32 %n, i32* %ptr, i1* %c_ptr) {
+; CHECK-LABEL: @f_3(
+; CHECK-NOT: loop.us:
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
+  %iv.inc = add i32 %iv, 1
+  %c = load volatile i1, i1* %c_ptr
+  call void(i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"() ]
+  store volatile i32 0, i32* %ptr
+  %becond = icmp ult i32 %iv.inc, %n
+  br i1 %becond, label %leave, label %loop
+
+leave:
+  ret void
+}
+
+define void @f_4(i32 %n, i32* %ptr, i1 %c) {
+; CHECK-LABEL: @f_4(
+;
+; Demonstrate that unswitching on one guard can cause another guard to
+; be erased (this has implications on what guards we can keep raw
+; pointers to).
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
+  %iv.inc = add i32 %iv, 1
+  call void(i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"(i32 1) ]
+  store volatile i32 0, i32* %ptr
+  %neg = xor i1 %c, 1
+  call void(i1, ...) @llvm.experimental.guard(i1 %neg) [ "deopt"(i32 2) ]
+  %becond = icmp ult i32 %iv.inc, %n
+  br i1 %becond, label %leave, label %loop
+
+leave:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/infinite-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/infinite-loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/infinite-loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/infinite-loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,59 @@
+; REQUIRES: asserts
+; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -loop-unswitch -simplifycfg -S < %s | FileCheck %s
+; PR5373
+
+; Loop unswitching shouldn't trivially unswitch the true case of condition %a
+; in the code here because it leads to an infinite loop. While this doesn't
+; contain any instructions with side effects, it's still a kind of side effect.
+; It can trivially unswitch on the false case of condition %a though.
+
+; STATS: 2 loop-unswitch - Number of branches unswitched
+; STATS: 2 loop-unswitch - Number of unswitches that are trivial
+
+; CHECK-LABEL: @func_16(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
+
+; CHECK: entry.split:
+; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split
+
+; CHECK: for.body:
+; CHECK-NEXT: br label %for.body
+
+; CHECK: abort0.split:
+; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
+; CHECK-NEXT: unreachable
+
+; CHECK: abort1.split:
+; CHECK-NEXT: call void @end1() [[NOR_NUW]]
+; CHECK-NEXT: unreachable
+
+; CHECK: }
+
+define void @func_16(i1 %a, i1 %b) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  br i1 %a, label %cond.end, label %abort0
+
+cond.end:
+  br i1 %b, label %for.body, label %abort1
+
+abort0:
+  call void @end0() noreturn nounwind
+  unreachable
+
+abort1:
+  call void @end1() noreturn nounwind
+  unreachable
+}
+
+declare void @end0() noreturn
+declare void @end1() noreturn
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }

Added: llvm/trunk/test/Transforms/LoopUnswitch/invalidate-scev.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/invalidate-scev.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/invalidate-scev.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/invalidate-scev.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; RUN: opt -S -indvars -loop-unswitch < %s | FileCheck %s
+; RUN: opt -S -indvars -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test_01() {
+
+; Make sure we don't fail by SCEV's assertion due to incorrect invalidation.
+; CHECK-LABEL: @test_01
+
+entry:
+  br label %loop
+
+loop:                           ; preds = %backedge, %entry
+  %p_50.addr.0 = phi i16 [ undef, %entry ], [ %add2699, %backedge ]
+  %idxprom2690 = sext i16 %p_50.addr.0 to i32
+  %arrayidx2691 = getelementptr inbounds [5 x i32], [5 x i32]* undef, i32 0, i32 %idxprom2690
+  %0 = load i32, i32* %arrayidx2691, align 1
+  %tobool2692 = icmp ne i32 %0, 0
+  br label %inner_loop
+
+inner_loop:                                     ; preds = %inner_backedge, %loop
+  br i1 %tobool2692, label %backedge, label %inner_backedge
+
+inner_backedge:                                       ; preds = %inner_loop
+  br label %inner_loop
+
+backedge:                                      ; preds = %inner_loop
+  %add2699 = add nsw i16 %p_50.addr.0, 1
+  br i1 false, label %loop, label %exit
+
+exit:               ; preds = %backedge
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/msan.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/msan.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/msan.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/msan.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,154 @@
+; RUN: opt < %s -loop-unswitch -verify-loop-info -S < %s 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-unswitch -verify-loop-info -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s 2>&1 | FileCheck %s
+
+ at sink = global i32 0, align 4
+ at y = global i64 0, align 8
+
+; The following is approximately:
+; void f(bool x, int p, int q) {
+;   volatile bool x2 = x;
+;   for (int i = 0; i < 1; ++i) {
+;     if (x2) {
+;       if (y)
+;         sink = p;
+;       else
+;         sink = q;
+;     }
+;   }
+; }
+; With MemorySanitizer, the loop can not be unswitched on "y", because "y" could
+; be uninitialized when x == false.
+; Test that the branch on "y" is inside the loop (after the first unconditional
+; branch).
+
+define void @may_not_execute(i1 zeroext %x, i32 %p, i32 %q) sanitize_memory {
+; CHECK-LABEL: @may_not_execute(
+entry:
+; CHECK: %[[Y:.*]] = load i64, i64* @y, align 8
+; CHECK: %[[YB:.*]] = icmp eq i64 %[[Y]], 0
+; CHECK-NOT: br i1
+; CHECK: br label
+; CHECK: br i1 %[[YB]]
+
+  %x2 = alloca i8, align 1
+  %frombool1 = zext i1 %x to i8
+  store volatile i8 %frombool1, i8* %x2, align 1
+  %0 = load i64, i64* @y, align 8
+  %tobool3 = icmp eq i64 %0, 0
+  br label %for.body
+
+for.body:
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %x2.0. = load volatile i8, i8* %x2, align 1
+  %tobool2 = icmp eq i8 %x2.0., 0
+  br i1 %tobool2, label %for.inc, label %if.then
+
+if.then:
+  br i1 %tobool3, label %if.else, label %if.then4
+
+if.then4:
+  store volatile i32 %p, i32* @sink, align 4
+  br label %for.inc
+
+if.else:
+  store volatile i32 %q, i32* @sink, align 4
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.01, 1
+  %cmp = icmp slt i32 %inc, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+
+; The same as above, but "y" is a function parameter instead of a global.
+; This shows that it is not enough to suppress hoisting of load instructions,
+; the actual problem is in the speculative branching.
+
+define void @may_not_execute2(i1 zeroext %x, i1 zeroext %y, i32 %p, i32 %q) sanitize_memory {
+; CHECK-LABEL: @may_not_execute2(
+entry:
+; CHECK-NOT: br i1
+; CHECK: br label
+; CHECK: br i1 %y,
+  %x2 = alloca i8, align 1
+  %frombool2 = zext i1 %x to i8
+  store volatile i8 %frombool2, i8* %x2, align 1
+  br label %for.body
+
+for.body:
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %x2.0. = load volatile i8, i8* %x2, align 1
+  %tobool3 = icmp eq i8 %x2.0., 0
+  br i1 %tobool3, label %for.inc, label %if.then
+
+if.then:
+  br i1 %y, label %if.then5, label %if.else
+
+if.then5:
+  store volatile i32 %p, i32* @sink, align 4
+  br label %for.inc
+
+if.else:
+  store volatile i32 %q, i32* @sink, align 4
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.01, 1
+  %cmp = icmp slt i32 %inc, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+
+; The following is approximately:
+; void f(bool x, int p, int q) {
+;   volatile bool x2 = x;
+;   for (int i = 0; i < 1; ++i) {
+;     if (y)
+;       sink = p;
+;     else
+;       sink = q;
+;   }
+; }
+; "if (y)" is guaranteed to execute; the loop can be unswitched.
+
+define void @must_execute(i1 zeroext %x, i32 %p, i32 %q) sanitize_memory {
+; CHECK-LABEL: @must_execute(
+entry:
+; CHECK:       %[[Y:.*]] = load i64, i64* @y, align 8
+; CHECK-NEXT:  %[[YB:.*]] = icmp eq i64 %[[Y]], 0
+; CHECK-NEXT:  br i1 %[[YB]],
+
+  %x2 = alloca i8, align 1
+  %frombool1 = zext i1 %x to i8
+  store volatile i8 %frombool1, i8* %x2, align 1
+  %0 = load i64, i64* @y, align 8
+  %tobool2 = icmp eq i64 %0, 0
+  br label %for.body
+
+for.body:
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  br i1 %tobool2, label %if.else, label %if.then
+
+if.then:
+  store volatile i32 %p, i32* @sink, align 4
+  br label %for.inc
+
+if.else:
+  store volatile i32 %q, i32* @sink, align 4
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.01, 1
+  %cmp = icmp slt i32 %inc, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopUnswitch/pr32818.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/pr32818.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/pr32818.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/pr32818.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; Check that the call doesn't get removed even if
+; it has no uses. It could have side-effects.
+; RUN: opt -loop-unswitch -S %s | FileCheck %s
+; RUN: opt -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S %s | FileCheck %s
+
+; CHECK-LABEL: @tinky
+define i32 @tinkywinky(i8 %patatino) {
+  %cmp1 = icmp slt i8 %patatino, 5
+  br label %body
+body:
+  %i = select i1 %cmp1, i8 6, i8 undef
+  br i1 true, label %body, label %end
+end:
+  %split = phi i8 [ %i, %body ]
+  %conv4 = sext i8 %split to i32
+; CHECK: tail call fastcc i32 @fn5(
+  %call = tail call fastcc i32 @fn5(i32 %conv4)
+  ret i32 0
+}
+declare fastcc i32 @fn5(i32 returned) unnamed_addr

Added: llvm/trunk/test/Transforms/LoopUnswitch/preserve-analyses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/preserve-analyses.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/preserve-analyses.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/preserve-analyses.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,130 @@
+; RUN: opt -loop-unswitch -verify-loop-info -verify-dom-info -disable-output < %s
+; RUN: opt -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -verify-loop-info -verify-dom-info -disable-output < %s
+
+; Loop unswitch should be able to unswitch these loops and
+; preserve LCSSA and LoopSimplify forms.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-apple-darwin9"
+
+ at delim1 = external global i32                     ; <i32*> [#uses=1]
+ at delim2 = external global i32                     ; <i32*> [#uses=1]
+
+define i32 @ineqn(i8* %s, i8* %p) nounwind readonly {
+entry:
+  %0 = load i32, i32* @delim1, align 4                 ; <i32> [#uses=1]
+  %1 = load i32, i32* @delim2, align 4                 ; <i32> [#uses=1]
+  br label %bb8.outer
+
+bb:                                               ; preds = %bb8
+  %2 = icmp eq i8* %p_addr.0, %s                  ; <i1> [#uses=1]
+  br i1 %2, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb
+  %3 = getelementptr inbounds i8, i8* %p_addr.0, i32 1 ; <i8*> [#uses=3]
+  switch i32 %ineq.0.ph, label %bb8.backedge [
+    i32 0, label %bb3
+    i32 1, label %bb6
+  ]
+
+bb8.backedge:                                     ; preds = %bb6, %bb5, %bb2
+  br label %bb8
+
+bb3:                                              ; preds = %bb2
+  %4 = icmp eq i32 %8, %0                         ; <i1> [#uses=1]
+  br i1 %4, label %bb8.outer.loopexit, label %bb5
+
+bb5:                                              ; preds = %bb3
+  br i1 %6, label %bb6, label %bb8.backedge
+
+bb6:                                              ; preds = %bb5, %bb2
+  %5 = icmp eq i32 %8, %1                         ; <i1> [#uses=1]
+  br i1 %5, label %bb7, label %bb8.backedge
+
+bb7:                                              ; preds = %bb6
+  %.lcssa1 = phi i8* [ %3, %bb6 ]                 ; <i8*> [#uses=1]
+  br label %bb8.outer.backedge
+
+bb8.outer.backedge:                               ; preds = %bb8.outer.loopexit, %bb7
+  %.lcssa2 = phi i8* [ %.lcssa1, %bb7 ], [ %.lcssa, %bb8.outer.loopexit ] ; <i8*> [#uses=1]
+  %ineq.0.ph.be = phi i32 [ 0, %bb7 ], [ 1, %bb8.outer.loopexit ] ; <i32> [#uses=1]
+  br label %bb8.outer
+
+bb8.outer.loopexit:                               ; preds = %bb3
+  %.lcssa = phi i8* [ %3, %bb3 ]                  ; <i8*> [#uses=1]
+  br label %bb8.outer.backedge
+
+bb8.outer:                                        ; preds = %bb8.outer.backedge, %entry
+  %ineq.0.ph = phi i32 [ 0, %entry ], [ %ineq.0.ph.be, %bb8.outer.backedge ] ; <i32> [#uses=3]
+  %p_addr.0.ph = phi i8* [ %p, %entry ], [ %.lcssa2, %bb8.outer.backedge ] ; <i8*> [#uses=1]
+  %6 = icmp eq i32 %ineq.0.ph, 1                  ; <i1> [#uses=1]
+  br label %bb8
+
+bb8:                                              ; preds = %bb8.outer, %bb8.backedge
+  %p_addr.0 = phi i8* [ %p_addr.0.ph, %bb8.outer ], [ %3, %bb8.backedge ] ; <i8*> [#uses=3]
+  %7 = load i8, i8* %p_addr.0, align 1                ; <i8> [#uses=2]
+  %8 = sext i8 %7 to i32                          ; <i32> [#uses=2]
+  %9 = icmp eq i8 %7, 0                           ; <i1> [#uses=1]
+  br i1 %9, label %bb10, label %bb
+
+bb10:                                             ; preds = %bb8, %bb
+  %.0 = phi i32 [ %ineq.0.ph, %bb ], [ 0, %bb8 ]  ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+; This is a simplified form of ineqn from above. It triggers some
+; different cases in the loop-unswitch code.
+
+define void @simplified_ineqn() nounwind readonly {
+entry:
+  br label %bb8.outer
+
+bb8.outer:                                        ; preds = %bb6, %bb2, %entry
+  %x = phi i32 [ 0, %entry ], [ 0, %bb6 ], [ 1, %bb2 ] ; <i32> [#uses=1]
+  br i1 undef, label %return, label %bb2
+
+bb2:                                              ; preds = %bb
+  switch i32 %x, label %bb6 [
+    i32 0, label %bb8.outer
+  ]
+
+bb6:                                              ; preds = %bb2
+  br i1 undef, label %bb8.outer, label %bb2
+
+return:                                             ; preds = %bb8, %bb
+  ret void
+}
+
+; This function requires special handling to preserve LCSSA form.
+; PR4934
+
+define void @pnp_check_irq() nounwind noredzone {
+entry:
+  %conv56 = trunc i64 undef to i32                ; <i32> [#uses=1]
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %while.cond.i.backedge, %entry
+  %call.i25 = call i8* @pci_get_device() nounwind noredzone ; <i8*> [#uses=2]
+  br i1 undef, label %if.then65, label %while.body.i
+
+while.body.i:                                     ; preds = %while.cond.i
+  br i1 undef, label %if.then31.i.i, label %while.cond.i.backedge
+
+while.cond.i.backedge:                            ; preds = %if.then31.i.i, %while.body.i
+  br label %while.cond.i
+
+if.then31.i.i:                                    ; preds = %while.body.i
+  switch i32 %conv56, label %while.cond.i.backedge [
+    i32 14, label %if.then42.i.i
+    i32 15, label %if.then42.i.i
+  ]
+
+if.then42.i.i:                                    ; preds = %if.then31.i.i, %if.then31.i.i
+  %call.i25.lcssa48 = phi i8* [ %call.i25, %if.then31.i.i ], [ %call.i25, %if.then31.i.i ] ; <i8*> [#uses=0]
+  unreachable
+
+if.then65:                                        ; preds = %while.cond.i
+  unreachable
+}
+
+declare i8* @pci_get_device() noredzone

Added: llvm/trunk/test/Transforms/LoopUnswitch/simplify-with-nonvalness.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/simplify-with-nonvalness.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/simplify-with-nonvalness.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/simplify-with-nonvalness.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,59 @@
+; RUN: opt < %s -loop-unswitch -verify-loop-info -S < %s 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-unswitch -verify-loop-info -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s 2>&1 | FileCheck %s
+
+; There are 1 case and 1 default case in the switch. after we unswitch, we know the
+; %a is definitely not 0 in one of the unswitched loop, make sure we take advantage
+; of that and simplify the branches in the loop.
+;
+; CHECK: define void @simplify_with_nonvalness(
+
+; This is the loop in which we know %a is definitely 0.
+; CHECK: sw.bb.us:
+; CHECK: br i1 true, label %if.then.us, label %if.end.us
+
+; This is the loop in which we do not know what %a is but we know %a is definitely NOT 0.
+; Make sure we use that information to simplify.
+; The icmp eq i32 %a, 0 in one of the unswitched loop is simplified to false.
+; CHECK: sw.bb.split:
+; CHECK: br i1 false, label %if.then, label %if.end
+
+define void @simplify_with_nonvalness(i32 %a) #0 {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, 1024
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  switch i32 %a, label %sw.default [
+    i32 0, label %sw.bb
+  ]
+
+sw.bb:
+  %cmp1 = icmp eq i32 %a, 0
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:
+  call void (...) @bar()
+  br label %if.end
+
+if.end:
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+declare void @bar(...) 

Added: llvm/trunk/test/Transforms/LoopUnswitch/trivial-unswitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/trivial-unswitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/trivial-unswitch.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/trivial-unswitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,92 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-threshold=0 -verify-loop-info -S < %s 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-unswitch -loop-unswitch-threshold=0 -verify-loop-info -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s 2>&1 | FileCheck %s
+
+; This test contains two trivial unswitch condition in one loop. 
+; LoopUnswitch pass should be able to unswitch the second one 
+; after unswitching the first one.
+
+
+; CHECK:  br i1 %cond1, label %..split_crit_edge, label %.loop_exit.split_crit_edge
+
+; CHECK:  ..split_crit_edge:                                ; preds = %0
+; CHECK:    br label %.split
+
+; CHECK:  .split:                                           ; preds = %..split_crit_edge
+; CHECK:    br i1 %cond2, label %.split..split.split_crit_edge, label %.split.loop_exit.split1_crit_edge
+
+; CHECK:  .split..split.split_crit_edge:                    ; preds = %.split
+; CHECK:    br label %.split.split
+
+; CHECK:  .split.split:                                     ; preds = %.split..split.split_crit_edge
+; CHECK:    br label %loop_begin
+
+; CHECK:  loop_begin:                                       ; preds = %do_something, %.split.split
+; CHECK:    br i1 true, label %continue, label %loop_exit
+
+; CHECK:  continue:                                         ; preds = %loop_begin
+; CHECK:    %var_val = load i32, i32* %var
+; CHECK:    br i1 true, label %do_something, label %loop_exit
+
+define i32 @test(i32* %var, i1 %cond1, i1 %cond2) {
+  br label %loop_begin
+
+loop_begin:  
+  br i1 %cond1, label %continue, label %loop_exit	; first trivial condition
+
+continue:
+  %var_val = load i32, i32* %var
+  br i1 %cond2, label %do_something, label %loop_exit	; second trivial condition  
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+
+; We will not be able trivially unswitch on the SwitchInst, as its input
+; is a constant. However, since its a constant we should be able to figure
+; out that the switch can be folded into a unconditional branch to %continue.
+; Then we unswitch on the br inst in %continue.
+;
+; CHECK: define i32 @test2(
+; This is an indication that the loop has been unswitched on %cond1.
+; CHECK:  br i1 %cond1, label %..split_crit_edge, label %.loop_exit.split_crit_edge
+
+; CHECK:  ..split_crit_edge:                                ; preds = %0
+; CHECK:    br label %.split
+
+; CHECK:  .split:                                           ; preds = %..split_crit_edge
+; CHECK:    br label %loop_begin
+
+; CHECK:  loop_begin:                                       ; preds = %do_something, %.split
+; CHECK:    switch i32
+
+; CHECK:  continue:                                         ; preds = %loop_begin
+; CHECK:    %var_val = load i32, i32* %var
+; CHECK:    br i1 true, label %do_something, label %loop_exit
+
+define i32 @test2(i32* %var, i1 %cond1) {
+  br label %loop_begin
+
+loop_begin:  
+  switch i32 1, label %continue [
+    i32 0, label %loop_exit
+    i32 1, label %continue
+  ]
+
+continue:
+  %var_val = load i32, i32* %var
+  br i1 %cond1, label %do_something, label %loop_exit
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @some_func() noreturn

Added: llvm/trunk/test/Transforms/LoopUnswitch/unswitch-equality-undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/unswitch-equality-undef.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/unswitch-equality-undef.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/unswitch-equality-undef.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,122 @@
+; REQUIRES: asserts
+; RUN: opt < %s -instcombine -licm -loop-unswitch -loop-unswitch-threshold=1000 -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output -stats 2>&1| FileCheck %s
+; Check no loop unswitch is done because unswitching of equality expr with
+; undef is unsafe before the freeze patch is committed.
+; CHECK-NOT: Number of branches unswitched
+
+define void @ham(i64 %arg) local_unnamed_addr {
+bb:
+  %tmp = icmp eq i64 %arg, 0
+  br i1 %tmp, label %bb3, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = load volatile i64, i64* @global, align 8
+  br label %bb3
+
+bb3:                                              ; preds = %bb1, %bb
+  %tmp4 = phi i64 [ %tmp2, %bb1 ], [ undef, %bb ]
+  %tmp5 = load i64, i64* @global.1, align 8
+  br label %bb6
+
+bb6:                                              ; preds = %bb21, %bb3
+  %tmp7 = phi i64 [ 3, %bb21 ], [ %tmp5, %bb3 ]
+  %tmp8 = phi i64 [ %tmp25, %bb21 ], [ 0, %bb3 ]
+  %tmp9 = icmp eq i64 %tmp7, %arg
+  br i1 %tmp9, label %bb10, label %bb28
+
+bb10:                                             ; preds = %bb6
+  %tmp11 = icmp eq i64 %tmp7, 0
+  br i1 %tmp11, label %bb21, label %bb12
+
+bb12:                                             ; preds = %bb10
+  %tmp13 = load i64, i64* @global.2, align 8
+  %tmp14 = add nsw i64 %tmp13, 1
+  store i64 %tmp14, i64* @global.2, align 8
+  %tmp15 = load i64, i64* @global.3, align 8
+  %tmp16 = icmp eq i64 %tmp15, %tmp4
+  br i1 %tmp16, label %bb17, label %bb21
+
+bb17:                                             ; preds = %bb12
+  %tmp18 = phi i64 [ %tmp15, %bb12 ]
+  %tmp19 = load i64, i64* @global.4, align 8
+  %tmp20 = add nsw i64 %tmp19, %tmp18
+  store i64 %tmp20, i64* @global.5, align 8
+  br label %bb29
+
+bb21:                                             ; preds = %bb12, %bb10
+  %tmp22 = load i64, i64* @global.3, align 8
+  %tmp23 = load volatile i64, i64* @global, align 8
+  %tmp24 = add nsw i64 %tmp23, %tmp22
+  store i64 %tmp24, i64* @global.5, align 8
+  store i64 3, i64* @global.1, align 8
+  %tmp25 = add nsw i64 %tmp8, 1
+  %tmp26 = load i64, i64* @global.6, align 8
+  %tmp27 = icmp slt i64 %tmp25, %tmp26
+  br i1 %tmp27, label %bb6, label %bb28
+
+bb28:                                             ; preds = %bb21, %bb6
+  br label %bb29
+
+bb29:                                             ; preds = %bb28, %bb17
+  ret void
+}
+
+define void @zot(i64 %arg, i64 %arg1) local_unnamed_addr {
+bb:
+  %tmp = icmp eq i64 %arg, 0
+  %tmp2 = select i1 %tmp, i64 %arg1, i64 undef
+  %tmp3 = load i64, i64* @global.1, align 8
+  br label %bb4
+
+bb4:                                              ; preds = %bb19, %bb
+  %tmp5 = phi i64 [ 3, %bb19 ], [ %tmp3, %bb ]
+  %tmp6 = phi i64 [ %tmp23, %bb19 ], [ 0, %bb ]
+  %tmp7 = icmp eq i64 %tmp5, %arg
+  br i1 %tmp7, label %bb8, label %bb26
+
+bb8:                                              ; preds = %bb4
+  %tmp9 = icmp eq i64 %tmp5, 0
+  br i1 %tmp9, label %bb19, label %bb10
+
+bb10:                                             ; preds = %bb8
+  %tmp11 = load i64, i64* @global.2, align 8
+  %tmp12 = add nsw i64 %tmp11, 1
+  store i64 %tmp12, i64* @global.2, align 8
+  %tmp13 = load i64, i64* @global.3, align 8
+  %tmp14 = icmp eq i64 %tmp13, %tmp2
+  br i1 %tmp14, label %bb15, label %bb19
+
+bb15:                                             ; preds = %bb10
+  %tmp16 = phi i64 [ %tmp13, %bb10 ]
+  %tmp17 = load i64, i64* @global.4, align 8
+  %tmp18 = add nsw i64 %tmp17, %tmp16
+  store i64 %tmp18, i64* @global.5, align 8
+  br label %bb27
+
+bb19:                                             ; preds = %bb10, %bb8
+  %tmp20 = load i64, i64* @global.3, align 8
+  %tmp21 = load volatile i64, i64* @global, align 8
+  %tmp22 = add nsw i64 %tmp21, %tmp20
+  store i64 %tmp22, i64* @global.5, align 8
+  store i64 3, i64* @global.1, align 8
+  %tmp23 = add nsw i64 %tmp6, 1
+  %tmp24 = load i64, i64* @global.6, align 8
+  %tmp25 = icmp slt i64 %tmp23, %tmp24
+  br i1 %tmp25, label %bb4, label %bb26
+
+bb26:                                             ; preds = %bb19, %bb4
+  br label %bb27
+
+bb27:                                             ; preds = %bb26, %bb15
+  ret void
+}
+
+ at global = common global i64 0, align 8
+ at global.1 = common global i64 0, align 8
+ at global.2 = common global i64 0, align 8
+ at global.3 = common global i64 0, align 8
+ at global.4 = common global i64 0, align 8
+ at global.5 = common global i64 0, align 8
+ at global.6 = common global i64 0, align 8
+
+

Added: llvm/trunk/test/Transforms/LoopUnswitch/unswitch-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/unswitch-select.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/unswitch-select.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/unswitch-select.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-unswitch -disable-output -stats 2>&1| FileCheck %s
+; RUN: opt < %s -loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output -stats 2>&1| FileCheck %s
+
+; Check the select statement in the loop will be unswitched.
+; CHECK: 1 loop-unswitch - Number of selects unswitched
+define i32 @test(i1 zeroext %x, i32 %a) local_unnamed_addr #0 {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %s.0 = phi i32 [ %a, %entry ], [ %add, %while.body ]
+  %cmp = icmp slt i32 %i.0, 10000
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %cond = select i1 %x, i32 %a, i32 %i.0
+  %add = add nsw i32 %s.0, %cond
+  %inc = add nsw i32 %i.0, 1
+  br label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  %s.0.lcssa = phi i32 [ %s.0, %while.cond ]
+  ret i32 %s.0.lcssa
+}
+

Added: llvm/trunk/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/12-12-11-if-conv.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/12-12-11-if-conv.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/12-12-11-if-conv.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;CHECK-LABEL: @foo(
+;CHECK: icmp eq <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @foo(i32 %x, i32 %t, i32* nocapture %A) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i32 %x, 0
+  br i1 %cmp10, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %if.end
+  %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %1 = add nsw i64 %indvars.iv, 45
+  %2 = trunc i64 %indvars.iv to i32
+  %mul = mul nsw i32 %2, %t
+  %3 = trunc i64 %1 to i32
+  %add1 = add nsw i32 %3, %mul
+  br label %if.end
+
+if.end:                                           ; preds = %for.body, %if.then
+  %z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ]
+  store i32 %z.0, i32* %arrayidx, align 4
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %x
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %if.end, %entry
+  ret i32 undef
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/2012-10-20-infloop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/2012-10-20-infloop.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/2012-10-20-infloop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,71 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce
+
+; Check that we don't fall into an infinite loop.
+define void @test() nounwind {
+entry:
+ br label %for.body
+
+for.body:
+ %0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
+ br label %for.body
+}
+
+
+
+define void @test2() nounwind {
+entry:
+ br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+ %indvars.iv47 = phi i64 [ 0, %entry ], [ %indvars.iv.next48, %for.body ]
+ %0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
+ %indvars.iv.next48 = add i64 %indvars.iv47, 1
+ br i1 undef, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+ unreachable
+}
+
+;PR14701
+define void @start_model_rare() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %cond.false, label %cond.true
+
+cond.true:                                        ; preds = %if.end
+  unreachable
+
+cond.false:                                       ; preds = %if.end
+  br i1 undef, label %cond.false28, label %cond.true20
+
+cond.true20:                                      ; preds = %cond.false
+  unreachable
+
+cond.false28:                                     ; preds = %cond.false
+  br label %for.body40
+
+for.body40:                                       ; preds = %for.inc50, %cond.false28
+  %indvars.iv123 = phi i64 [ 3, %cond.false28 ], [ %indvars.iv.next124, %for.inc50 ]
+  %step.0121 = phi i32 [ 1, %cond.false28 ], [ %step.1, %for.inc50 ]
+  br i1 undef, label %if.then46, label %for.inc50
+
+if.then46:                                        ; preds = %for.body40
+  %inc47 = add nsw i32 %step.0121, 1
+  br label %for.inc50
+
+for.inc50:                                        ; preds = %if.then46, %for.body40
+  %k.1 = phi i32 [ undef, %for.body40 ], [ %inc47, %if.then46 ]
+  %step.1 = phi i32 [ %step.0121, %for.body40 ], [ %inc47, %if.then46 ]
+  %indvars.iv.next124 = add i64 %indvars.iv123, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next124 to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %for.end52, label %for.body40
+
+for.end52:                                        ; preds = %for.inc50
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,53 @@
+; RUN: opt < %s  -loop-vectorize -dce -force-vector-interleave=1 -force-vector-width=4 
+
+; Check that we don't crash.
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.6.3 LLVM: 3.2svn\22"
+
+ at b = common global [32000 x float] zeroinitializer, align 16
+
+define i32 @set1ds(i32 %_n, float* nocapture %arr, float %value, i32 %stride) nounwind uwtable {
+entry:
+  %0 = icmp sgt i32 %_n, 0
+  br i1 %0, label %"3.lr.ph", label %"5"
+
+"3.lr.ph":                                        ; preds = %entry
+  %1 = bitcast float* %arr to i8*
+  %2 = sext i32 %stride to i64
+  br label %"3"
+
+"3":                                              ; preds = %"3.lr.ph", %"3"
+  %indvars.iv = phi i64 [ 0, %"3.lr.ph" ], [ %indvars.iv.next, %"3" ]
+  %3 = shl nsw i64 %indvars.iv, 2
+  %4 = getelementptr inbounds i8, i8* %1, i64 %3
+  %5 = bitcast i8* %4 to float*
+  store float %value, float* %5, align 4
+  %indvars.iv.next = add i64 %indvars.iv, %2
+  %6 = trunc i64 %indvars.iv.next to i32
+  %7 = icmp slt i32 %6, %_n
+  br i1 %7, label %"3", label %"5"
+
+"5":                                              ; preds = %"3", %entry
+  ret i32 0
+}
+
+define i32 @init(i8* nocapture %name) unnamed_addr nounwind uwtable {
+entry:
+  br label %"3"
+
+"3":                                              ; preds = %"3", %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %"3" ]
+  %0 = shl nsw i64 %indvars.iv, 2
+  %1 = getelementptr inbounds i8, i8* bitcast (float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 16000) to i8*), i64 %0
+  %2 = bitcast i8* %1 to float*
+  store float -1.000000e+00, float* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 16000
+  br i1 %exitcond, label %"5", label %"3"
+
+"5":                                              ; preds = %"3"
+  ret i32 0
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/2016-07-27-loop-vec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/2016-07-27-loop-vec.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/2016-07-27-loop-vec.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/2016-07-27-loop-vec.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,19 @@
+; RUN: opt < %s -loop-vectorize -S
+
+define void @foo() local_unnamed_addr {
+entry:
+  %exitcond = icmp eq i64 3, 3
+  br label %for.body
+
+for.body:                                         ; preds = %entry
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %total1 = add nsw i64 %i.05, 3
+  %inc = add nuw nsw i64 %i.05, 1
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,79 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -disable-output -debug-only=loop-vectorize 2>&1 | FileCheck %s --check-prefix=COST
+; RUN: opt < %s -loop-vectorize -force-vector-width=2 -instcombine -simplifycfg -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; This test checks that we correctly compute the scalarized operands for a
+; user-specified vectorization factor when interleaving is disabled. We use the
+; "optsize" attribute to disable all interleaving calculations.  A cost of 4
+; for %tmp4 indicates that we would scalarize it's operand (%tmp3), giving
+; %tmp4 a lower scalarization overhead.
+;
+; COST-LABEL:  predicated_udiv_scalarized_operand
+; COST:        LV: Found an estimated cost of 4 for VF 2 For instruction: %tmp4 = udiv i64 %tmp2, %tmp3
+;
+; CHECK-LABEL: @predicated_udiv_scalarized_operand(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %entry ], [ [[INDEX_NEXT:%.*]], %[[PRED_UDIV_CONTINUE2:.*]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %entry ], [ [[TMP17:%.*]], %[[PRED_UDIV_CONTINUE2]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, i64* %a, i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i64> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]]
+; CHECK:       [[PRED_UDIV_IF]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i64 [[TMP4]], %x
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = udiv i64 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> undef, i64 [[TMP7]], i32 0
+; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE]]
+; CHECK:       [[PRED_UDIV_CONTINUE]]:
+; CHECK-NEXT:    [[TMP9:%.*]] = phi <2 x i64> [ undef, %vector.body ], [ [[TMP8]], %[[PRED_UDIV_IF]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2]]
+; CHECK:       [[PRED_UDIV_IF1]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = add nsw i64 [[TMP11]], %x
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
+; CHECK-NEXT:    [[TMP14:%.*]] = udiv i64 [[TMP13]], [[TMP12]]
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP14]], i32 1
+; CHECK-NEXT:    br label %[[PRED_UDIV_CONTINUE2]]
+; CHECK:       [[PRED_UDIV_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = phi <2 x i64> [ [[TMP9]], %[[PRED_UDIV_CONTINUE]] ], [ [[TMP15]], %[[PRED_UDIV_IF1]] ]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP16]], <2 x i64> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP17]] = add <2 x i64> [[VEC_PHI]], [[PREDPHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
+;
+define i64 @predicated_udiv_scalarized_operand(i64* %a, i64 %x) optsize {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
+  %r = phi i64 [ 0, %entry ], [ %tmp6, %for.inc ]
+  %tmp0 = getelementptr inbounds i64, i64* %a, i64 %i
+  %tmp2 = load i64, i64* %tmp0, align 4
+  %cond0 = icmp sgt i64 %tmp2, 0
+  br i1 %cond0, label %if.then, label %for.inc
+
+if.then:
+  %tmp3 = add nsw i64 %tmp2, %x
+  %tmp4 = udiv i64 %tmp2, %tmp3
+  br label %for.inc
+
+for.inc:
+  %tmp5 = phi i64 [ %tmp2, %for.body ], [ %tmp4, %if.then]
+  %tmp6 = add i64 %r, %tmp5
+  %i.next = add nuw nsw i64 %i, 1
+  %cond1 = icmp slt i64 %i.next, 100
+  br i1 %cond1, label %for.body, label %for.end
+
+for.end:
+  %tmp7 = phi i64 [ %tmp6, %for.inc ]
+  ret i64 %tmp7
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt < %s -loop-vectorize -mtriple=aarch64-none-linux-gnu -mattr=+neon -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; Function Attrs: nounwind
+define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
+;CHECK-LABEL: array_add
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+entry:
+  %cmp10 = icmp sgt i32 %size, 0
+  br i1 %cmp10, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %size
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret i32* %c
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,147 @@
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 | FileCheck %s
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s --check-prefix=FORCE-VEC
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnueabi"
+
+; Test integer induction variable of step 2:
+;   for (int i = 0; i < 1024; i+=2) {
+;     int tmp = *A++;
+;     sum += i * tmp;
+;   }
+
+; CHECK-LABEL: @ind_plus2(
+; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: mul nsw <4 x i32>
+; CHECK: mul nsw <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: %index.next = add i64 %index, 8
+; CHECK: icmp eq i64 %index.next, 512
+
+; FORCE-VEC-LABEL: @ind_plus2(
+; FORCE-VEC: %wide.load = load <2 x i32>, <2 x i32>*
+; FORCE-VEC: mul nsw <2 x i32>
+; FORCE-VEC: add nsw <2 x i32>
+; FORCE-VEC: %index.next = add i64 %index, 2
+; FORCE-VEC: icmp eq i64 %index.next, 512
+define i32 @ind_plus2(i32* %A) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %add1, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
+  %0 = load i32, i32* %A.addr, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %add1 = add nsw i32 %i, 2
+  %cmp = icmp slt i32 %add1, 1024
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+}
+
+
+; Test integer induction variable of step -2:
+;   for (int i = 1024; i > 0; i-=2) {
+;     int tmp = *A++;
+;     sum += i * tmp;
+;   }
+
+; CHECK-LABEL: @ind_minus2(
+; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: mul nsw <4 x i32>
+; CHECK: mul nsw <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: %index.next = add i64 %index, 8
+; CHECK: icmp eq i64 %index.next, 512
+
+; FORCE-VEC-LABEL: @ind_minus2(
+; FORCE-VEC: %wide.load = load <2 x i32>, <2 x i32>*
+; FORCE-VEC: mul nsw <2 x i32>
+; FORCE-VEC: add nsw <2 x i32>
+; FORCE-VEC: %index.next = add i64 %index, 2
+; FORCE-VEC: icmp eq i64 %index.next, 512
+define i32 @ind_minus2(i32* %A) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ]
+  %i = phi i32 [ 1024, %entry ], [ %sub, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
+  %0 = load i32, i32* %A.addr, align 4
+  %mul = mul nsw i32 %0, %i
+  %add = add nsw i32 %mul, %sum
+  %sub = add nsw i32 %i, -2
+  %cmp = icmp sgt i32 %i, 2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+}
+
+
+; Test pointer induction variable of step 2. As currently we don't support
+; masked load/store, vectorization is possible but not beneficial. If loop
+; vectorization is not enforced, LV will only do interleave.
+;   for (int i = 0; i < 1024; i++) {
+;     int tmp0 = *A++;
+;     int tmp1 = *A++;
+;     sum += tmp0 * tmp1;
+;   }
+
+; CHECK-LABEL: @ptr_ind_plus2(
+; CHECK: %[[V0:.*]] = load <8 x i32>
+; CHECK: %[[V1:.*]] = load <8 x i32>
+; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: mul nsw <4 x i32>
+; CHECK: mul nsw <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: %index.next = add i64 %index, 8
+; CHECK: icmp eq i64 %index.next, 1024
+
+; FORCE-VEC-LABEL: @ptr_ind_plus2(
+; FORCE-VEC: %[[V:.*]] = load <4 x i32>
+; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> undef, <2 x i32> <i32 0, i32 2>
+; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> undef, <2 x i32> <i32 1, i32 3>
+; FORCE-VEC: mul nsw <2 x i32>
+; FORCE-VEC: add nsw <2 x i32>
+; FORCE-VEC: %index.next = add i64 %index, 2
+; FORCE-VEC: icmp eq i64 %index.next, 1024
+define i32 @ptr_ind_plus2(i32* %A) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr1, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
+  %0 = load i32, i32* %A.addr, align 4
+  %inc.ptr1 = getelementptr inbounds i32, i32* %A.addr, i64 2
+  %1 = load i32, i32* %inc.ptr, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, %sum
+  %inc = add nsw i32 %i, 1
+  %exitcond = icmp eq i32 %inc, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt < %s -loop-vectorize -mtriple=arm64-none-linux-gnu -mattr=+neon -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; Function Attrs: nounwind
+define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
+;CHECK-LABEL: array_add
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+entry:
+  %cmp10 = icmp sgt i32 %size, 0
+  br i1 %cmp10, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %size
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret i32* %c
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,166 @@
+; RUN: opt -mtriple=aarch64--linux-gnueabi -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s -S | FileCheck %s
+
+; The following tests contain loops for which SCEV cannot determine the backedge
+; taken count. This is because the backedge taken condition is produced by an
+; icmp with one of the sides being a loop varying non-AddRec expression.
+; However, there is a possibility to normalize this to an AddRec expression
+; using SCEV predicates. This allows us to compute a 'guarded' backedge count.
+; The Loop Vectorizer is able to version to loop in order to use this guarded
+; backedge count and vectorize more loops.
+
+
+; CHECK-LABEL: test_sge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sge(i32* noalias %A,
+                      i32* noalias %B,
+                      i32* noalias %C, i32 %N) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+  %indvars.next = add i16 %indvars.iv, 1
+  %indvars.ext = zext i16 %indvars.iv to i32
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = mul i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp sge i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: test_uge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_uge(i32* noalias %A,
+                      i32* noalias %B,
+                      i32* noalias %C, i32 %N, i32 %Offset) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+  %indvars.next = add i16 %indvars.iv, 1
+
+  %indvars.ext = sext i16 %indvars.iv to i32
+  %indvars.access = add i32 %Offset, %indvars.ext
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = add i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp uge i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: test_ule
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_ule(i32* noalias %A,
+                      i32* noalias %B,
+                      i32* noalias %C, i32 %N,
+                      i16 %M) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+  %indvars.next = sub i16 %indvars.iv, 1
+  %indvars.ext = zext i16 %indvars.iv to i32
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = mul i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp ule i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: test_sle
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sle(i32* noalias %A,
+                   i32* noalias %B,
+                   i32* noalias %C, i32 %N,
+                   i16 %M) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+  %indvars.next = sub i16 %indvars.iv, 1
+  %indvars.ext = sext i16 %indvars.iv to i32
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = mul i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp sle i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,54 @@
+; RUN: opt -S < %s -loop-vectorize -instcombine 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+;; See https://llvm.org/bugs/show_bug.cgi?id=25490
+;; Due to the data structures used, the LLVM IR was not determinisic.
+;; This test comes from the PR.
+
+;; CHECK-LABEL: @test(
+; CHECK: load <16 x i8>
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: load <16 x i8>
+; CHECK-NEXT: zext <16 x i8>
+; CHECK-NEXT: zext <16 x i8>
+define void @test(i32 %n, i8* nocapture %a, i8* nocapture %b, i8* nocapture readonly %c) {
+entry:
+  %cmp.28 = icmp eq i32 %n, 0
+  br i1 %cmp.28, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i8, i8* %c, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
+  %1 = load i8, i8* %arrayidx2, align 1
+  %conv3 = zext i8 %1 to i32
+  %mul = mul nuw nsw i32 %conv3, %conv
+  %shr.26 = lshr i32 %mul, 8
+  %conv4 = trunc i32 %shr.26 to i8
+  store i8 %conv4, i8* %arrayidx2, align 1
+  %arrayidx8 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
+  %2 = load i8, i8* %arrayidx8, align 1
+  %conv9 = zext i8 %2 to i32
+  %mul10 = mul nuw nsw i32 %conv9, %conv
+  %shr11.27 = lshr i32 %mul10, 8
+  %conv12 = trunc i32 %shr11.27 to i8
+  store i8 %conv12, i8* %arrayidx8, align 1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/gather-cost.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/gather-cost.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/gather-cost.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,85 @@
+; RUN: opt -loop-vectorize -mtriple=arm64-apple-ios -S -mcpu=cyclone -enable-interleaved-mem-accesses=false < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+
+ at kernel = global [512 x float] zeroinitializer, align 16
+ at kernel2 = global [512 x float] zeroinitializer, align 16
+ at kernel3 = global [512 x float] zeroinitializer, align 16
+ at kernel4 = global [512 x float] zeroinitializer, align 16
+ at src_data = global [1536 x float] zeroinitializer, align 16
+ at r_ = global i8 0, align 1
+ at g_ = global i8 0, align 1
+ at b_ = global i8 0, align 1
+
+; We don't want to vectorize most loops containing gathers because they are
+; expensive.
+; Make sure we don't vectorize it.
+; CHECK-NOT: x float>
+
+define void @_Z4testmm(i64 %size, i64 %offset) {
+entry:
+  %cmp53 = icmp eq i64 %size, 0
+  br i1 %cmp53, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %r.057 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add10, %for.body ]
+  %g.056 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add20, %for.body ]
+  %v.055 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ]
+  %add = add i64 %v.055, %offset
+  %mul = mul i64 %add, 3
+  %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul
+  %0 = load float, float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 %v.055
+  %1 = load float, float* %arrayidx2, align 4
+  %mul3 = fmul fast float %0, %1
+  %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 %v.055
+  %2 = load float, float* %arrayidx4, align 4
+  %mul5 = fmul fast float %mul3, %2
+  %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 %v.055
+  %3 = load float, float* %arrayidx6, align 4
+  %mul7 = fmul fast float %mul5, %3
+  %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 %v.055
+  %4 = load float, float* %arrayidx8, align 4
+  %mul9 = fmul fast float %mul7, %4
+  %add10 = fadd fast float %r.057, %mul9
+  %arrayidx.sum = add i64 %mul, 1
+  %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum
+  %5 = load float, float* %arrayidx11, align 4
+  %mul13 = fmul fast float %1, %5
+  %mul15 = fmul fast float %2, %mul13
+  %mul17 = fmul fast float %3, %mul15
+  %mul19 = fmul fast float %4, %mul17
+  %add20 = fadd fast float %g.056, %mul19
+  %arrayidx.sum52 = add i64 %mul, 2
+  %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52
+  %6 = load float, float* %arrayidx21, align 4
+  %mul23 = fmul fast float %1, %6
+  %mul25 = fmul fast float %2, %mul23
+  %mul27 = fmul fast float %3, %mul25
+  %mul29 = fmul fast float %4, %mul27
+  %add30 = fadd fast float %b.054, %mul29
+  %inc = add i64 %v.055, 1
+  %exitcond = icmp ne i64 %inc, %size
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+  %add30.lcssa = phi float [ %add30, %for.body ]
+  %add20.lcssa = phi float [ %add20, %for.body ]
+  %add10.lcssa = phi float [ %add10, %for.body ]
+  %phitmp = fptoui float %add10.lcssa to i8
+  %phitmp60 = fptoui float %add20.lcssa to i8
+  %phitmp61 = fptoui float %add30.lcssa to i8
+  br label %for.end
+
+for.end:
+  %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  store i8 %r.0.lcssa, i8* @r_, align 1
+  store i8 %g.0.lcssa, i8* @g_, align 1
+  store i8 %b.0.lcssa, i8* @b_, align 1
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt < %s -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: @non_primary_iv_trunc_free(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 5
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDUCTION]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDUCTION1]] to i32
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
+;
+define void @non_primary_iv_trunc_free(i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+  %tmp0 = trunc i64 %i to i32
+  %i.next = add nuw nsw i64 %i, 5
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; REQUIRES: asserts
+; RUN: opt < %s -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -S --debug-only=loop-vectorize 2>&1 | FileCheck %s
+
+; This test shows extremely high interleaving cost that, probably, should be fixed.
+; Due to the high cost, interleaving is not beneficial and the cost model chooses to scalarize
+; the load instructions.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%pair = type { i8, i8 }
+
+; CHECK-LABEL: test
+; CHECK: Found an estimated cost of 20 for VF 2 For instruction:   {{.*}} load i8
+; CHECK: Found an estimated cost of 0 for VF 2 For instruction:   {{.*}} load i8
+; CHECK: vector.body
+; CHECK: load i8
+; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
+
+define void @test(%pair* %p, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr %pair, %pair* %p, i64 %i, i32 0
+  %tmp1 = load i8, i8* %tmp0, align 1
+  %tmp2 = getelementptr %pair, %pair* %p, i64 %i, i32 1
+  %tmp3 = load i8, i8* %tmp2, align 1
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp eq i64 %i.next, %n
+  br i1 %cond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,189 @@
+; RUN: opt -loop-vectorize -force-vector-width=2 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_2
+; RUN: opt -loop-vectorize -force-vector-width=4 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_4
+; RUN: opt -loop-vectorize -force-vector-width=8 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_8
+; RUN: opt -loop-vectorize -force-vector-width=16 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_16
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnueabi"
+
+%i8.2 = type {i8, i8}
+define void @i8_factor_2(%i8.2* %data, i64 %n) {
+entry:
+  br label %for.body
+
+; VF_8-LABEL:  Checking a loop in "i8_factor_2"
+; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_16-LABEL: Checking a loop in "i8_factor_2"
+; VF_16:         Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 1
+  %tmp2 = load i8, i8* %tmp0, align 1
+  %tmp3 = load i8, i8* %tmp1, align 1
+  store i8 0, i8* %tmp0, align 1
+  store i8 0, i8* %tmp1, align 1
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+%i16.2 = type {i16, i16}
+define void @i16_factor_2(%i16.2* %data, i64 %n) {
+entry:
+  br label %for.body
+
+; VF_4-LABEL: Checking a loop in "i16_factor_2"
+; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_8-LABEL:  Checking a loop in "i16_factor_2"
+; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_16-LABEL: Checking a loop in "i16_factor_2"
+; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 1
+  %tmp2 = load i16, i16* %tmp0, align 2
+  %tmp3 = load i16, i16* %tmp1, align 2
+  store i16 0, i16* %tmp0, align 2
+  store i16 0, i16* %tmp1, align 2
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+%i32.2 = type {i32, i32}
+define void @i32_factor_2(%i32.2* %data, i64 %n) {
+entry:
+  br label %for.body
+
+; VF_2-LABEL:  Checking a loop in "i32_factor_2"
+; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_4-LABEL:  Checking a loop in "i32_factor_2"
+; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_8-LABEL:  Checking a loop in "i32_factor_2"
+; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_16-LABEL: Checking a loop in "i32_factor_2"
+; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 1
+  %tmp2 = load i32, i32* %tmp0, align 4
+  %tmp3 = load i32, i32* %tmp1, align 4
+  store i32 0, i32* %tmp0, align 4
+  store i32 0, i32* %tmp1, align 4
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+%i64.2 = type {i64, i64}
+define void @i64_factor_2(%i64.2* %data, i64 %n) {
+entry:
+  br label %for.body
+
+; VF_2-LABEL:  Checking a loop in "i64_factor_2"
+; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_4-LABEL:  Checking a loop in "i64_factor_2"
+; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_8-LABEL:  Checking a loop in "i64_factor_2"
+; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_16-LABEL: Checking a loop in "i64_factor_2"
+; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 1
+  %tmp2 = load i64, i64* %tmp0, align 8
+  %tmp3 = load i64, i64* %tmp1, align 8
+  store i64 0, i64* %tmp0, align 8
+  store i64 0, i64* %tmp1, align 8
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+%i64.8 = type {i64, i64, i64, i64, i64, i64, i64, i64}
+define void @i64_factor_8(%i64.8* %data, i64 %n) {
+entry:
+  br label %for.body
+
+; The interleave factor in this test is 8, which is greater than the maximum
+; allowed factor for AArch64 (4). Thus, we will fall back to the basic TTI
+; implementation for determining the cost of the interleaved load group. The
+; stores do not form a legal interleaved group because the group would contain
+; gaps.
+;
+; VF_2-LABEL: Checking a loop in "i64_factor_8"
+; VF_2:         Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
+; VF_2-NEXT:    Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
+; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
+; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr inbounds %i64.8, %i64.8* %data, i64 %i, i32 2
+  %tmp1 = getelementptr inbounds %i64.8, %i64.8* %data, i64 %i, i32 6
+  %tmp2 = load i64, i64* %tmp0, align 8
+  %tmp3 = load i64, i64* %tmp1, align 8
+  store i64 0, i64* %tmp0, align 8
+  store i64 0, i64* %tmp1, align 8
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,310 @@
+; RUN: opt -S < %s -basicaa -loop-vectorize -force-vector-interleave=1 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; CHECK-LABEL: @add_a(
+; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: add <16 x i8>
+; CHECK: store <16 x i8>
+; Function Attrs: nounwind
+define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+entry:
+  %cmp8 = icmp sgt i32 %len, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx
+  %conv = zext i8 %0 to i32
+  %add = add nuw nsw i32 %conv, 2
+  %conv1 = trunc i32 %add to i8
+  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  store i8 %conv1, i8* %arrayidx3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; Ensure that we preserve nuw/nsw if we're not shrinking the values we're
+; working with.
+; CHECK-LABEL: @add_a1(
+; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: add nuw nsw <16 x i8>
+; CHECK: store <16 x i8>
+; Function Attrs: nounwind
+define void @add_a1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+entry:
+  %cmp8 = icmp sgt i32 %len, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx
+  %add = add nuw nsw i8 %0, 2
+  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  store i8 %add, i8* %arrayidx3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_b(
+; CHECK: load <8 x i16>, <8 x i16>*
+; CHECK: add <8 x i16>
+; CHECK: store <8 x i16>
+; Function Attrs: nounwind
+define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+entry:
+  %cmp9 = icmp sgt i32 %len, 0
+  br i1 %cmp9, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+  %0 = load i16, i16* %arrayidx
+  %conv8 = zext i16 %0 to i32
+  %add = add nuw nsw i32 %conv8, 2
+  %conv1 = trunc i32 %add to i16
+  %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
+  store i16 %conv1, i16* %arrayidx3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_c(
+; CHECK: load <8 x i8>, <8 x i8>*
+; CHECK: add <8 x i16>
+; CHECK: store <8 x i16>
+; Function Attrs: nounwind
+define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+entry:
+  %cmp8 = icmp sgt i32 %len, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx
+  %conv = zext i8 %0 to i32
+  %add = add nuw nsw i32 %conv, 2
+  %conv1 = trunc i32 %add to i16
+  %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
+  store i16 %conv1, i16* %arrayidx3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_d(
+; CHECK: load <4 x i16>
+; CHECK: add nsw <4 x i32>
+; CHECK: store <4 x i32>
+define void @add_d(i16* noalias nocapture readonly %p, i32* noalias nocapture %q, i32 %len) #0 {
+entry:
+  %cmp7 = icmp sgt i32 %len, 0
+  br i1 %cmp7, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+  %0 = load i16, i16* %arrayidx
+  %conv = sext i16 %0 to i32
+  %add = add nsw i32 %conv, 2
+  %arrayidx2 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx2
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_e(
+; CHECK: load <16 x i8>
+; CHECK: shl <16 x i8>
+; CHECK: add <16 x i8>
+; CHECK: or <16 x i8>
+; CHECK: mul <16 x i8>
+; CHECK: and <16 x i8>
+; CHECK: xor <16 x i8>
+; CHECK: mul <16 x i8>
+; CHECK: store <16 x i8>
+define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+entry:
+  %cmp.32 = icmp sgt i32 %len, 0
+  br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %conv11 = zext i8 %arg2 to i32
+  %conv13 = zext i8 %arg1 to i32
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx
+  %conv = zext i8 %0 to i32
+  %add = shl i32 %conv, 4
+  %conv2 = add nuw nsw i32 %add, 32
+  %or = or i32 %conv, 51
+  %mul = mul nuw nsw i32 %or, 60
+  %and = and i32 %conv2, %conv13
+  %mul.masked = and i32 %mul, 252
+  %conv17 = xor i32 %mul.masked, %conv11
+  %mul18 = mul nuw nsw i32 %conv17, %and
+  %conv19 = trunc i32 %mul18 to i8
+  %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  store i8 %conv19, i8* %arrayidx21
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_f
+; CHECK: load <8 x i16>
+; CHECK: trunc <8 x i16>
+; CHECK: shl <8 x i8>
+; CHECK: add <8 x i8>
+; CHECK: or <8 x i8>
+; CHECK: mul <8 x i8>
+; CHECK: and <8 x i8>
+; CHECK: xor <8 x i8>
+; CHECK: mul <8 x i8>
+; CHECK: store <8 x i8>
+define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+entry:
+  %cmp.32 = icmp sgt i32 %len, 0
+  br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %conv11 = zext i8 %arg2 to i32
+  %conv13 = zext i8 %arg1 to i32
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+  %0 = load i16, i16* %arrayidx
+  %conv = sext i16 %0 to i32
+  %add = shl i32 %conv, 4
+  %conv2 = add nsw i32 %add, 32
+  %or = and i32 %conv, 204
+  %conv8 = or i32 %or, 51
+  %mul = mul nuw nsw i32 %conv8, 60
+  %and = and i32 %conv2, %conv13
+  %mul.masked = and i32 %mul, 252
+  %conv17 = xor i32 %mul.masked, %conv11
+  %mul18 = mul nuw nsw i32 %conv17, %and
+  %conv19 = trunc i32 %mul18 to i8
+  %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  store i8 %conv19, i8* %arrayidx21
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_phifail(
+; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: add nuw nsw <16 x i32>
+; CHECK: store <16 x i8>
+; Function Attrs: nounwind
+define void @add_phifail(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+entry:
+  %cmp8 = icmp sgt i32 %len, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx
+  %conv = zext i8 %0 to i32
+  %add = add nuw nsw i32 %conv, 2
+  %conv1 = trunc i32 %add to i8
+  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  store i8 %conv1, i8* %arrayidx3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; Function Attrs: nounwind
+; When we vectorize this loop, we generate correct code
+; even when %len exactly divides VF (since we extract from the second last index
+; and pass this to the for.cond.cleanup block). Vectorized loop returns 
+; the correct value a_phi = p[len -2]
+define i8 @add_phifail2(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+; CHECK-LABEL: @add_phifail2(
+; CHECK: vector.body:
+; CHECK:   %wide.load = load <16 x i8>, <16 x i8>*
+; CHECK:   %[[L1:.+]] = zext <16 x i8> %wide.load to <16 x i32>
+; CHECK:   add nuw nsw <16 x i32>
+; CHECK:   store <16 x i8>
+; CHECK:   add i64 %index, 16
+; CHECK:   icmp eq i64 %index.next, %n.vec
+; CHECK: middle.block:
+; CHECK:   %vector.recur.extract = extractelement <16 x i32> %[[L1]], i32 15
+; CHECK:   %vector.recur.extract.for.phi = extractelement <16 x i32> %[[L1]], i32 14
+; CHECK: for.cond.cleanup:
+; CHECK:   %a_phi.lcssa = phi i32 [ %scalar.recur, %for.body ], [ %vector.recur.extract.for.phi, %middle.block ]
+; CHECK:   %ret = trunc i32 %a_phi.lcssa to i8
+; CHECK:   ret i8 %ret
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %ret = trunc i32 %a_phi to i8
+  ret i8 %ret
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx
+  %conv = zext i8 %0 to i32
+  %add = add nuw nsw i32 %conv, 2
+  %conv1 = trunc i32 %add to i8
+  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  store i8 %conv1, i8* %arrayidx3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nounwind }
+

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt < %s -force-vector-interleave=1 -store-to-load-forwarding-conflict-detection=false -loop-vectorize -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%struct.pair = type { i32, i32 }
+
+; Check vectorization of interleaved access groups with positive dependence
+; distances. In this test, the maximum safe dependence distance for
+; vectorization is 16 bytes. Normally, this would lead to a maximum VF of 4.
+; However, for interleaved groups, the effective VF is VF * IF, where IF is the
+; interleave factor. Here, the maximum safe dependence distance is recomputed
+; as 16 / IF bytes, resulting in VF=2. Since IF=2, we should generate <4 x i32>
+; loads and stores instead of <8 x i32> accesses.
+;
+; Note: LAA's conflict detection optimization has to be disabled for this test
+;       to be vectorized.
+
+; struct pair {
+;   int x;
+;   int y;
+; };
+;
+; void max_vf(struct pair *restrict p) {
+;   for (int i = 0; i < 1000; i++) {
+;     p[i + 2].x = p[i].x
+;     p[i + 2].y = p[i].y
+;   }
+; }
+
+; CHECK-LABEL: @max_vf
+; CHECK: load <4 x i32>
+; CHECK: store <4 x i32>
+
+define void @max_vf(%struct.pair* noalias nocapture %p) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %0 = add nuw nsw i64 %i, 2
+  %p_i.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 0
+  %p_i_plus_2.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 0
+  %1 = load i32, i32* %p_i.x, align 4
+  store i32 %1, i32* %p_i_plus_2.x, align 4
+  %p_i.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 1
+  %p_i_plus_2.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 1
+  %2 = load i32, i32* %p_i.y, align 4
+  store i32 %2, i32* %p_i_plus_2.y, align 4
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp eq i64 %i.next, 1000
+  br i1 %cond, label %for.exit, label %for.body
+
+for.exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,49 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -S -debug-only=loop-vectorize 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: all_scalar
+; CHECK:       LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2
+; CHECK:       LV: Found an estimated cost of 2 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2
+; CHECK:       LV: Not considering vector loop of width 2 because it will not generate any vector instructions
+;
+define void @all_scalar(i64* %a, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %tmp0 = getelementptr i64, i64* %a, i64 %i
+  store i64 0, i64* %tmp0, align 1
+  %i.next = add nuw nsw i64 %i, 2
+  %cond = icmp eq i64 %i.next, %n
+  br i1 %cond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: PR33193
+; CHECK:       LV: Found scalar instruction: %i.next = zext i32 %j.next to i64
+; CHECK:       LV: Found an estimated cost of 0 for VF 8 For instruction: %i.next = zext i32 %j.next to i64
+; CHECK:       LV: Not considering vector loop of width 8 because it will not generate any vector instructions
+%struct.a = type { i32, i8 }
+define void @PR33193(%struct.a* %a, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %j = phi i32 [ 0, %entry ], [ %j.next, %for.body ]
+  %tmp0 = getelementptr inbounds %struct.a, %struct.a* %a, i64 %i, i32 1
+  store i8 0, i8* %tmp0, align 4
+  %j.next = add i32 %j, 1
+  %i.next = zext i32 %j.next to i64
+  %cond = icmp ugt i64 %n, %i.next
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,144 @@
+; RUN: opt -S -loop-vectorize -enable-vplan-native-path -mtriple aarch64-gnu-linux < %s | FileCheck %s
+
+; extern int arr[8][8];
+; extern int arr2[8];
+;
+; void foo(int n)
+; {
+;   int i1, i2;
+;
+; #pragma clang loop vectorize(enable)
+;   for (i1 = 0; i1 < 8; i1++) {
+;     arr2[i1] = i1;
+;     for (i2 = 0; i2 < 8; i2++)
+;       arr[i2][i1] = i1 + n;
+;   }
+; }
+;
+
+; CHECK-LABEL: @foo_i32(
+; CHECK-LABEL: vector.ph:
+; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> undef, i32 %n, i32 0
+; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> undef, <4 x i32> zeroinitializer
+
+; CHECK-LABEL: vector.body:
+; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
+; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <4 x i64> %[[VecInd]]
+; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[VecIndTr]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
+; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]
+; CHECK: br label %[[InnerLoop:.+]]
+
+; CHECK: [[InnerLoop]]:
+; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ], [ zeroinitializer, %vector.body ]
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StoreVal]], <4 x i32*> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
+; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>
+; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0
+; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]
+
+; CHECK: [[ForInc]]:
+; CHECK: %[[IndNext]] = add i64 %[[Ind]], 4
+; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8
+; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body
+
+ at arr2 = external global [8 x i32], align 16
+ at arr = external global [8 x [8 x i32]], align 16
+
+ at arrX = external global [8 x i64], align 16
+ at arrY = external global [8 x [8 x i64]], align 16
+
+; Function Attrs: norecurse nounwind uwtable
+define void @foo_i32(i32 %n) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc8, %entry
+  %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+  %0 = trunc i64 %indvars.iv21 to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %1 = trunc i64 %indvars.iv21 to i32
+  %add = add nsw i32 %1, %n
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i32 %add, i32* %arrayidx7, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 8
+  br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8:                                         ; preds = %for.body3
+  %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
+  %exitcond23 = icmp eq i64 %indvars.iv.next22, 8
+  br i1 %exitcond23, label %for.end10, label %for.body, !llvm.loop !1
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+; CHECK-LABEL: @foo_i64(
+; CHECK-LABEL: vector.ph:
+; CHECK: %[[SplatVal:.*]] = insertelement <2 x i64> undef, i64 %n, i32 0
+; CHECK: %[[Splat:.*]] = shufflevector <2 x i64> %[[SplatVal]], <2 x i64> undef, <2 x i32> zeroinitializer
+
+; CHECK-LABEL: vector.body:
+; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
+; CHECK: %[[VecInd:.*]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i64], [8 x i64]* @arrX, i64 0, <2 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %[[VecInd]], <2 x i64*> %[[AAddr]], i32 4, <2 x i1> <i1 true, i1 true>)
+; CHECK: %[[StoreVal:.*]] = add nsw <2 x i64> %[[VecInd]], %[[Splat]]
+; CHECK: br label %[[InnerLoop:.+]]
+
+; CHECK: [[InnerLoop]]:
+; CHECK: %[[InnerPhi:.*]] = phi <2 x i64> [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ], [ zeroinitializer, %vector.body ]
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arrY, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %[[StoreVal]], <2 x i64*> %[[AAddr2]], i32 4, <2 x i1> <i1 true, i1 true>
+; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], <i64 1, i64 1>
+; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], <i64 8, i64 8>
+; CHECK: %[[InnerCond:.*]] = extractelement <2 x i1> %[[VecCond]], i32 0
+; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]
+
+; CHECK: [[ForInc]]:
+; CHECK: %[[IndNext]] = add i64 %[[Ind]], 2
+; CHECK: %[[VecIndNext]] = add <2 x i64> %[[VecInd]], <i64 2, i64 2>
+; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8
+; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body
+; Function Attrs: norecurse nounwind uwtable
+define void @foo_i64(i64 %n) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc8, %entry
+  %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
+  %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* @arrX, i64 0, i64 %indvars.iv21
+  store i64 %indvars.iv21, i64* %arrayidx, align 4
+  %add = add nsw i64 %indvars.iv21, %n
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arrY, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i64 %add, i64* %arrayidx7, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 8
+  br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8:                                         ; preds = %for.body3
+  %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
+  %exitcond23 = icmp eq i64 %indvars.iv.next22, 8
+  br i1 %exitcond23, label %for.end10, label %for.body, !llvm.loop !1
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/pr31900.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/pr31900.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/pr31900.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/pr31900.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt -S -mtriple=aarch64-apple-ios -loop-vectorize -enable-interleaved-mem-accesses -force-vector-width=2 < %s | FileCheck %s
+
+; Reproducer for address space fault in the LoopVectorizer (pr31900). Added
+; different sized address space pointers (p:16:16-p4:32:16) to the aarch64
+; datalayout to reproduce the fault.
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128-p:16:16-p4:32:16"
+
+; Check that all the loads are scalarized
+; CHECK: load i16, i16*
+; CHECK: load i16, i16*
+; CHECK: load i16, i16 addrspace(4)*
+; CHECK: load i16, i16 addrspace(4)*
+
+%rec1445 = type { i16, i16, i16, i16, i16 }
+
+define void @foo() {
+bb1:
+  br label %bb4
+
+bb4:
+  %tmp1 = phi i16 [ undef, %bb1 ], [ %_tmp1013, %bb4 ]
+  %tmp2 = phi %rec1445* [ undef, %bb1 ], [ %_tmp1015, %bb4 ]
+  %tmp3 = phi %rec1445 addrspace(4)* [ undef, %bb1 ], [ %_tmp1017, %bb4 ]
+  %0 = getelementptr %rec1445, %rec1445* %tmp2, i16 0, i32 1
+  %_tmp987 = load i16, i16* %0, align 1
+  %1 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 0, i32 1
+  %_tmp993 = load i16, i16 addrspace(4)* %1, align 1
+  %_tmp1013 = add i16 %tmp1, 1
+  %_tmp1015 = getelementptr %rec1445, %rec1445* %tmp2, i16 1
+  %_tmp1017 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 1
+  %_tmp1019 = icmp ult i16 %_tmp1013, 24
+  br i1 %_tmp1019, label %bb4, label %bb16
+
+bb16:
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/pr33053.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/pr33053.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/pr33053.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/pr33053.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt -S -mtriple=aarch64 -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+ at b = common local_unnamed_addr global i32 0, align 4
+ at a = common local_unnamed_addr global i16* null, align 8
+
+; Function Attrs: norecurse nounwind readonly
+define i32 @fn1() local_unnamed_addr #0 {
+; Ensure that we don't emit reduction intrinsics for unsupported short reductions.
+; CHECK-NOT: @llvm.experimental.vector.reduce
+entry:
+  %0 = load i32, i32* @b, align 4, !tbaa !1
+  %cmp40 = icmp sgt i32 %0, 0
+  br i1 %cmp40, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %1 = load i16*, i16** @a, align 8, !tbaa !5
+  %2 = load i32, i32* @b, align 4, !tbaa !1
+  %3 = sext i32 %2 to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %d.043 = phi i16 [ undef, %for.body.lr.ph ], [ %.sink28, %for.body ]
+  %c.042 = phi i16 [ undef, %for.body.lr.ph ], [ %c.0., %for.body ]
+  %arrayidx = getelementptr inbounds i16, i16* %1, i64 %indvars.iv
+  %4 = load i16, i16* %arrayidx, align 2, !tbaa !7
+  %cmp2 = icmp sgt i16 %c.042, %4
+  %c.0. = select i1 %cmp2, i16 %c.042, i16 %4
+  %cmp13 = icmp slt i16 %d.043, %4
+  %.sink28 = select i1 %cmp13, i16 %d.043, i16 %4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp = icmp slt i64 %indvars.iv.next, %3
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  %c.0.lcssa = phi i16 [ undef, %entry ], [ %c.0., %for.body ]
+  %d.0.lcssa = phi i16 [ undef, %entry ], [ %.sink28, %for.body ]
+  %cmp26 = icmp sgt i16 %c.0.lcssa, %d.0.lcssa
+  %conv27 = zext i1 %cmp26 to i32
+  ret i32 %conv27
+}
+
+attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+!llvm.ident = !{!0}
+
+!0 = !{!"clang"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"any pointer", !3, i64 0}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"short", !3, i64 0}