[llvm] r358552 - Revert "Temporarily Revert "Add basic loop fusion pass.""

Tue Apr 16 21:53:01 PDT 2019

Added: llvm/trunk/test/Transforms/LoopVectorize/pr25281.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr25281.ll?rev=358552&view=auto
==============================================================================

--- llvm/trunk/test/Transforms/LoopVectorize/pr25281.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr25281.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,58 @@
+; RUN: opt < %s  -scev-aa -loop-vectorize -print-alias-sets -S  -o - 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; PR25281
+; Just check that we don't crash on this test.
+; CHECK-LABEL: @foo
+define void @foo(float** noalias nocapture readonly %in, i32* noalias nocapture readonly %isCompressed, float* noalias nocapture readonly %out) {
+entry_block:
+  %tmp = getelementptr float*, float** %in, i32 0
+  %in_0 = load float*, float** %tmp, !alias.scope !0
+  %tmp1 = getelementptr i32, i32* %isCompressed, i32 0
+  %isCompressed_0 = load i32, i32* %tmp1, !alias.scope !1
+  %tmp2 = getelementptr float*, float** %in, i32 1
+  %in_1 = load float*, float** %tmp2, !alias.scope !2
+  %tmp3 = getelementptr i32, i32* %isCompressed, i32 1
+  %isCompressed_1 = load i32, i32* %tmp3, !alias.scope !3
+  br label %for_each_frames
+
+for_each_frames:
+  %frameIndex = phi i32 [ 0, %entry_block ], [ %nextFrameIndex, %for_each_frames_end ]
+  %nextFrameIndex = add nuw nsw i32 %frameIndex, 2
+  br label %for_each_channel
+
+for_each_channel:
+  %channelIndex = phi i32 [ 0, %for_each_frames ], [ %nextChannelIndex, %for_each_channel ]
+  %nextChannelIndex = add nuw nsw i32 %channelIndex, 1
+  %tmp4 = add i32 %frameIndex, %channelIndex
+  %tmp5 = xor i32 %isCompressed_0, 1
+  %tmp6 = mul i32 %frameIndex, %tmp5
+  %offset0 = add i32 %tmp6, %channelIndex
+  %tmp7 = getelementptr float, float* %in_0, i32 %offset0
+  %in_0_index = load float, float* %tmp7, align 4, !alias.scope !4
+  %tmp8 = xor i32 %isCompressed_1, 1
+  %tmp9 = mul i32 %frameIndex, %tmp8
+  %offset1 = add i32 %tmp9, %channelIndex
+  %tmp10 = getelementptr float, float* %in_1, i32 %offset1
+  %in_1_index = load float, float* %tmp10, align 4, !alias.scope !5
+  %tmp11 = fadd float %in_0_index, %in_1_index
+  %tmp12 = getelementptr float, float* %out, i32 %tmp4
+  store float %tmp11, float* %tmp12, align 4, !alias.noalias !6
+  %tmp13 = icmp eq i32 %nextChannelIndex, 2
+  br i1 %tmp13, label %for_each_frames_end, label %for_each_channel
+
+for_each_frames_end:
+  %tmp14 = icmp eq i32 %nextFrameIndex, 512
+  br i1 %tmp14, label %return, label %for_each_frames
+
+return:
+  ret void
+}
+
+!0 = distinct !{!0}
+!1 = distinct !{!1, !0}
+!2 = distinct !{!2, !0}
+!3 = distinct !{!3, !0}
+!4 = distinct !{!4, !0}
+!5 = distinct !{!5, !0}
+!6 = !{!2, !3, !4, !5, !1}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr28541.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr28541.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr28541.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr28541.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,71 @@
+; RUN: opt -loop-vectorize -pass-remarks=loop-vectorize -S < %s 2>&1 | FileCheck %s
+
+; FIXME: Check for -pass-remarks-missed and -pass-remarks-analysis output when
+; addAcyclicInnerLoop emits analysis.
+
+; Check that opt does not crash on such input:
+;
+; a, b, c;
+; fn1() {
+;   while (b--) {
+;     c = a;
+;     switch (a & 3)
+;     case 0:
+;       do
+;     case 3:
+;     case 2:
+;     case 1:
+;         ;
+;         while (--c)
+;           ;
+;   }
+; }
+
+ at b = common global i32 0, align 4
+ at a = common global i32 0, align 4
+ at c = common global i32 0, align 4
+
+; CHECK-NOT: vectorized loop
+; CHECK-LABEL: fn1
+
+define i32 @fn1() {
+entry:
+  %tmp2 = load i32, i32* @b, align 4
+  %dec3 = add nsw i32 %tmp2, -1
+  store i32 %dec3, i32* @b, align 4
+  %tobool4 = icmp eq i32 %tmp2, 0
+  br i1 %tobool4, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:                                 ; preds = %entry
+  %tmp1 = load i32, i32* @a, align 4
+  %and = and i32 %tmp1, 3
+  %switch = icmp eq i32 %and, 0
+  br label %while.body
+
+while.cond:                                       ; preds = %do.cond
+  %dec = add nsw i32 %dec7, -1
+  %tobool = icmp eq i32 %dec7, 0
+  br i1 %tobool, label %while.cond.while.end_crit_edge, label %while.body
+
+while.body:                                       ; preds = %while.body.lr.ph, %while.cond
+  %dec7 = phi i32 [ %dec3, %while.body.lr.ph ], [ %dec, %while.cond ]
+  br i1 %switch, label %do.body, label %do.cond
+
+do.body:                                          ; preds = %do.cond, %while.body
+  %dec25 = phi i32 [ %dec2, %do.cond ], [ %tmp1, %while.body ]
+  br label %do.cond
+
+do.cond:                                          ; preds = %do.body, %while.body
+  %dec26 = phi i32 [ %dec25, %do.body ], [ %tmp1, %while.body ]
+  %dec2 = add nsw i32 %dec26, -1
+  %tobool3 = icmp eq i32 %dec2, 0
+  br i1 %tobool3, label %while.cond, label %do.body
+
+while.cond.while.end_crit_edge:                   ; preds = %while.cond
+  store i32 0, i32* @c, align 4
+  store i32 -1, i32* @b, align 4
+  br label %while.end
+
+while.end:                                        ; preds = %while.cond.while.end_crit_edge, %entry
+  ret i32 undef
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,241 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check that the vectorizer identifies the %p.09 phi,
+; as an induction variable, despite the potential overflow
+; due to the truncation from 32bit to 8bit. 
+; SCEV will detect the pattern "sext(trunc(%p.09)) + %step"
+; and generate the required runtime checks under which
+; we can assume no overflow. We check here that we generate
+; exactly two runtime checks:
+; 1) an overflow check:
+;    {0,+,(trunc i32 %step to i8)}<%for.body> Added Flags: <nssw>
+; 2) an equality check verifying that the step of the induction 
+;    is equal to sext(trunc(step)): 
+;    Equal predicate: %step == (sext i8 (trunc i32 %step to i8) to i32)
+; 
+; See also pr30654.
+;
+; int a[N];
+; void doit1(int n, int step) {
+;   int i;
+;   char p = 0;
+;   for (i = 0; i < n; i++) {
+;      a[i] = p;
+;      p = p + step;
+;   }
+; }
+; 
+
+; CHECK-LABEL: @doit1
+; CHECK: vector.scevcheck
+; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow
+; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]]
+; CHECK: %ident.check = icmp ne i32 {{.*}}, %{{.*}}
+; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: vector.body:
+; CHECK: <4 x i32>
+
+ at a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit1(i32 %n, i32 %step) local_unnamed_addr {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body.preheader, label %for.end
+
+for.body.preheader:                    
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:                  
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %sext = shl i32 %p.09, 24
+  %conv = ashr exact i32 %sext, 24
+  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add nsw i32 %conv, %step
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                        
+  br label %for.end
+
+for.end:                                
+  ret void
+}
+
+; Same as above, but for checking the SCEV "zext(trunc(%p.09)) + %step".
+; Here we expect the following two predicates to be added for runtime checking:
+; 1) {0,+,(trunc i32 %step to i8)}<%for.body> Added Flags: <nusw>
+; 2) Equal predicate: %step == (sext i8 (trunc i32 %step to i8) to i32)
+;
+; int a[N];
+; void doit2(int n, int step) {
+;   int i;
+;   unsigned char p = 0;
+;   for (i = 0; i < n; i++) {
+;      a[i] = p;
+;      p = p + step;
+;   }
+; }
+; 
+
+; CHECK-LABEL: @doit2
+; CHECK: vector.scevcheck
+; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow
+; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]]
+; CHECK: %[[EXT:[0-9]+]] = sext i8 {{.*}} to i32
+; CHECK: %ident.check = icmp ne i32 {{.*}}, %[[EXT]]
+; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: vector.body:
+; CHECK: <4 x i32>
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit2(i32 %n, i32 %step) local_unnamed_addr  {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body.preheader, label %for.end
+
+for.body.preheader:                             
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:                                      
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %conv = and i32 %p.09, 255
+  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add nsw i32 %conv, %step
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                        
+  br label %for.end
+
+for.end:                                
+  ret void
+}
+
+; Here we check that the same phi scev analysis would fail 
+; to create the runtime checks because the step is not invariant.
+; As a result vectorization will fail.
+;
+; int a[N];
+; void doit3(int n, int step) {
+;   int i;
+;   char p = 0;
+;   for (i = 0; i < n; i++) {
+;      a[i] = p;
+;      p = p + step;
+;      step += 2;
+;   }
+; }
+;
+
+; CHECK-LABEL: @doit3
+; CHECK-NOT: vector.scevcheck
+; CHECK-NOT: vector.body:
+; CHECK-LABEL: for.body:
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit3(i32 %n, i32 %step) local_unnamed_addr {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %p.012 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %step.addr.010 = phi i32 [ %add3, %for.body ], [ %step, %for.body.preheader ]
+  %sext = shl i32 %p.012, 24
+  %conv = ashr exact i32 %sext, 24
+  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add nsw i32 %conv, %step.addr.010
+  %add3 = add nsw i32 %step.addr.010, 2
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; Lastly, we also check the case where we can tell at compile time that
+; the step of the induction is equal to sext(trunc(step)), in which case
+; we don't have to check this equality at runtime (we only need the
+; runtime overflow check). Therefore only the following overflow predicate
+; will be added for runtime checking:
+; {0,+,%cstep}<%for.body> Added Flags: <nssw>
+;
+; a[N];
+; void doit4(int n, char cstep) {
+;   int i;
+;   char p = 0;
+;   int istep = cstep;
+;  for (i = 0; i < n; i++) {
+;      a[i] = p;
+;      p = p + istep;
+;   }
+; }
+
+; CHECK-LABEL: @doit4
+; CHECK: vector.scevcheck
+; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: %{{.*}} = or i1 {{.*}}, %mul.overflow
+; CHECK-NOT: %ident.check = icmp ne i32 {{.*}}, %{{.*}}
+; CHECK-NOT: %{{.*}} = or i1 %{{.*}}, %ident.check
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: vector.body:
+; CHECK: <4 x i32>
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr {
+entry:
+  %conv = sext i8 %cstep to i32
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %p.011 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %sext = shl i32 %p.011, 24
+  %conv2 = ashr exact i32 %sext, 24
+  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv2, i32* %arrayidx, align 4
+  %add = add nsw i32 %conv2, %conv
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,66 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; Produced from the test-case:
+;
+; extern void foo(char *, unsigned , unsigned *);
+; extern void bar(int *, long);
+; extern char *processBuf(char *);
+;
+; extern unsigned theSize;
+;
+; void foo(char *buf, unsigned denominator, unsigned *flag) {
+;   int incr = (int) (theSize / denominator);
+;   int inx = 0;
+;   while (*flag) {
+;     int itmp = inx + incr;
+;     int i = (int) theSize;
+;     bar(&i, (long) itmp);
+;     buf = processBuf(buf);
+;     inx = itmp;
+;   }
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+ at theSize = external local_unnamed_addr global i32, align 4
+
+define void @foo(i8* %buf, i32 %denominator, i32* %flag) local_unnamed_addr {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i32, i32* @theSize, align 4
+  %div = udiv i32 %0, %denominator
+  %1 = load i32, i32* %flag, align 4
+  %tobool5 = icmp eq i32 %1, 0
+  br i1 %tobool5, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:                                 ; preds = %entry
+  %2 = bitcast i32* %i to i8*
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.lr.ph, %while.body
+; Check that there are two PHIs followed by a 'sext' in the same block, and that
+; the test does not crash.
+; CHECK:        phi
+; CHECK-NEXT:   phi
+; CHECK-NEXT:   sext
+  %buf.addr.07 = phi i8* [ %buf, %while.body.lr.ph ], [ %call, %while.body ]
+  %inx.06 = phi i32 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
+  %add = add nsw i32 %inx.06, %div
+  %3 = load i32, i32* @theSize, align 4
+  store i32 %3, i32* %i, align 4
+  %conv = sext i32 %add to i64
+  call void @bar(i32* nonnull %i, i64 %conv)
+  %call = call i8* @processBuf(i8* %buf.addr.07)
+  %4 = load i32, i32* %flag, align 4
+  %tobool = icmp eq i32 %4, 0
+  br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  ret void
+}
+
+declare void @bar(i32*, i64) local_unnamed_addr
+declare i8* @processBuf(i8*) local_unnamed_addr

Added: llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,65 @@
+; RUN: opt -loop-vectorize -S < %s 2>&1 | FileCheck %s
+
+; Produced from test-case:
+;
+; void testGuardedInnerLoop(uint32_t *ptr, uint32_t denom, uint32_t numer, uint32_t outer_lim)
+; {
+;   for(uint32_t outer_i = 0; outer_i < outer_lim; ++outer_i) {
+;     if (denom > 0) {
+;       const uint32_t lim = numer / denom;
+;
+;       for (uint32_t i = 0; i < lim; ++i)
+;         ptr[i] = 1;
+;     }
+;   }
+; }
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @testGuardedInnerLoop(i32* %ptr, i32 %denom, i32 %numer, i32 %outer_lim) {
+entry:
+  %cmp1 = icmp eq i32 %outer_lim, 0
+  br i1 %cmp1, label %exit, label %loop1.preheader
+
+; Verify that a 'udiv' does not appear between the 'loop1.preheader' label, and
+; whatever label comes next.
+loop1.preheader:
+; CHECK-LABEL: loop1.preheader:
+; CHECK-NOT: udiv
+; CHECK-LABEL: :
+  br label %loop1
+
+loop1:
+  %outer_i = phi i32 [ %inc1, %loop2.exit ], [ 0, %loop1.preheader ]
+  %0 = add i32 %denom, -1
+  %1 = icmp ult i32 %0, %numer
+  br i1 %1, label %loop2.preheader, label %loop2.exit
+
+; Verify that a 'udiv' does appear between the 'loop2.preheader' label, and
+; whatever label comes next.
+loop2.preheader:
+; CHECK-LABEL: loop2.preheader:
+; CHECK: udiv
+; CHECK-LABEL: :
+  %lim = udiv i32 %numer, %denom
+  %2 = zext i32 %lim to i64
+  br label %loop2
+
+loop2:
+  %indvar.loop2 = phi i64 [ 0, %loop2.preheader ], [ %indvar.loop2.next, %loop2 ]
+  %arrayidx = getelementptr inbounds i32, i32* %ptr, i64 %indvar.loop2
+  store i32 1, i32* %arrayidx, align 4
+  %indvar.loop2.next = add nuw nsw i64 %indvar.loop2, 1
+  %cmp2 = icmp ult i64 %indvar.loop2.next, %2
+  br i1 %cmp2, label %loop2, label %loop2.exit
+
+loop2.exit:
+  %inc1 = add nuw i32 %outer_i, 1
+  %exitcond = icmp eq i32 %inc1, %outer_lim
+  br i1 %exitcond, label %exit, label %loop1
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr31098.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr31098.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr31098.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr31098.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,100 @@
+; REQUIRES: asserts
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -debug-only=loop-accesses < %s  2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check that the compile-time-unknown depenendece-distance is resolved 
+; statically. Due to the non-unit stride of the accesses in this testcase
+; we are currently not able to create runtime dependence checks, and therefore
+; if we don't resolve the dependence statically we cannot vectorize the loop.
+;
+; Specifically in this example, during dependence analysis we get 6 unknown 
+; dependence distances between the 8 real/imaginary accesses below: 
+;    dist = 8*D, 4+8*D, -4+8*D, -8*D, 4-8*D, -4-8*D.
+; At compile time we can prove for all of the above that |dist|>loopBound*step
+; (where the step is 8bytes, and the loopBound is D-1), and thereby conclude 
+; that there are no dependencies (without runtime tests):
+; |8*D|>8*D-8, |4+8*D|>8*D-8, |-4+8*D|>8*D-8, etc.
+
+; #include <stdlib.h>
+; class Complex {
+; private:
+;   float real_;
+;   float imaginary_;
+;
+; public:
+;   Complex() : real_(0), imaginary_(0) { }
+;   Complex(float real, float imaginary) : real_(real), imaginary_(imaginary) { }
+;   Complex(const Complex &rhs) : real_(rhs.real()), imaginary_(rhs.imaginary()) { }
+; 
+;   inline float real() const { return real_; }
+;   inline float imaginary() const { return imaginary_; }
+; 
+;   Complex operator+(const Complex& rhs) const
+;   {
+;    return Complex(real_ + rhs.real_, imaginary_ + rhs.imaginary_);
+;   }
+;
+;   Complex operator-(const Complex& rhs) const
+;  {
+;     return Complex(real_ - rhs.real_, imaginary_ - rhs.imaginary_);
+;   }
+; };
+;
+; void Test(Complex *out, size_t size)
+; {
+;     size_t D = size / 2;
+;     for (size_t offset = 0; offset < D; ++offset)
+;     {
+;         Complex t0 = out[offset];
+;         Complex t1 = out[offset + D];
+;         out[offset] = t1 + t0;
+;         out[offset + D] = t0 - t1;
+;     }
+; }
+
+; CHECK-LABEL: Test
+; CHECK: LAA: No unsafe dependent memory operations in loop.  We don't need runtime memory checks.
+; CHECK: vector.body:
+; CHECK: <4 x i32>
+
+%class.Complex = type { float, float }
+
+define void @Test(%class.Complex* nocapture %out, i64 %size) local_unnamed_addr {
+entry:
+  %div = lshr i64 %size, 1
+  %cmp47 = icmp eq i64 %div, 0
+  br i1 %cmp47, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %offset.048 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %0 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.048, i32 0
+  %1 = load float, float* %0, align 4
+  %imaginary_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.048, i32 1
+  %2 = load float, float* %imaginary_.i.i, align 4
+  %add = add nuw i64 %offset.048, %div
+  %3 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add, i32 0
+  %4 = load float, float* %3, align 4
+  %imaginary_.i.i28 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add, i32 1
+  %5 = load float, float* %imaginary_.i.i28, align 4
+  %add.i = fadd fast float %4, %1
+  %add4.i = fadd fast float %5, %2
+  store float %add.i, float* %0, align 4
+  store float %add4.i, float* %imaginary_.i.i, align 4
+  %sub.i = fsub fast float %1, %4
+  %sub4.i = fsub fast float %2, %5
+  store float %sub.i, float* %3, align 4
+  store float %sub4.i, float* %imaginary_.i.i28, align 4
+  %inc = add nuw nsw i64 %offset.048, 1
+  %exitcond = icmp eq i64 %inc, %div
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr31190.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr31190.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr31190.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr31190.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,63 @@
+; RUN: opt -passes='loop-vectorize' -debug -S < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; This checks we don't crash when the inner loop we're trying to vectorize
+; is a SCEV AddRec with respect to an outer loop.
+
+; In this case, the problematic PHI is:
+; %0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ]
+; Since %inc54 is the IV of the outer loop, and %0 equivalent to it,
+; we get the situation described above.
+
+; Code that leads to this situation can look something like:
+;
+; int a, b[1], c;
+; void fn1 ()
+; {
+;  for (; c; c++)
+;    for (a = 0; a; a++)
+;      b[c] = 4;
+; }
+;
+; The PHI is an artifact of the register promotion of c.
+
+; Note that we can no longer get the vectorizer to actually see such PHIs,
+; because LV now simplifies the loop internally, but the test is still
+; useful as a regression test, and in case loop-simplify behavior changes.
+
+ at c = external global i32, align 4
+ at a = external global i32, align 4
+ at b = external global [1 x i32], align 4
+
+; We can vectorize this loop because we are storing an invariant value into an
+; invariant address.
+
+; CHECK: LV: We can vectorize this loop!
+; CHECK-LABEL: @test
+define void @test() {
+entry:
+  %a.promoted2 = load i32, i32* @a, align 1
+  %c.promoted = load i32, i32* @c, align 1
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.for.inc4_crit_edge, %entry
+  %inc54 = phi i32 [ %inc5, %for.cond1.for.inc4_crit_edge ], [ %c.promoted, %entry ]
+  %inc.lcssa3 = phi i32 [ %inc.lcssa, %for.cond1.for.inc4_crit_edge ], [ %a.promoted2, %entry ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
+  %inc1 = phi i32 [ %inc.lcssa3, %for.cond1.preheader ], [ %inc, %for.body3 ]
+  %0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ]
+  %idxprom = sext i32 %0 to i64
+  %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* @b, i64 0, i64 %idxprom
+  store i32 4, i32* %arrayidx, align 4
+  %inc = add nsw i32 %inc1, 1
+  %tobool2 = icmp eq i32 %inc, 0
+  br i1 %tobool2, label %for.cond1.for.inc4_crit_edge, label %for.body3
+
+for.cond1.for.inc4_crit_edge:                     ; preds = %for.body3
+  %inc.lcssa = phi i32 [ %inc, %for.body3 ]
+  %.lcssa = phi i32 [ %inc54, %for.body3 ]
+  %inc5 = add nsw i32 %.lcssa, 1
+  br label %for.cond1.preheader
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr32859.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr32859.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr32859.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr32859.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+
+; Out of the LCSSA form we could have 'phi i32 [ loop-invariant, %for.inc.2.i ]'
+; but the IR Verifier requires for PHI one entry for each predecessor of
+; it's parent basic block. The original PR14725 solution for the issue just
+; added 'undef' for an predecessor BB and which is not correct. We copy the real
+; value for another predecessor instead of bringing 'undef'.
+
+; CHECK-LABEL: for.cond.preheader:
+; CHECK: %e.0.ph = phi i32 [ 0, %if.end.2.i ], [ 0, %middle.block ]
+
+; Function Attrs: nounwind uwtable
+define void @main() #0 {
+entry:
+  br label %for.cond1.preheader.i
+
+for.cond1.preheader.i:                            ; preds = %if.end.2.i, %entry
+  %c.06.i = phi i32 [ 0, %entry ], [ %inc5.i, %if.end.2.i ]
+  %tobool.i = icmp ne i32 undef, 0
+  br label %if.end.2.i
+
+if.end.2.i:                                       ; preds = %for.cond1.preheader.i
+  %inc5.i = add nsw i32 %c.06.i, 1
+  %cmp.i = icmp slt i32 %inc5.i, 16
+  br i1 %cmp.i, label %for.cond1.preheader.i, label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %if.end.2.i
+  %e.0.ph = phi i32 [ 0, %if.end.2.i ]
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr33706.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr33706.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr33706.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr33706.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,61 @@
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s | FileCheck %s
+
+ at global = local_unnamed_addr global i32 0, align 4
+ at global.1 = local_unnamed_addr global i32 0, align 4
+ at global.2 = local_unnamed_addr global float 0x3EF0000000000000, align 4
+
+; CHECK-LABEL: @PR33706
+; CHECK-NOT: <2 x i32>
+define void @PR33706(float* nocapture readonly %arg, float* nocapture %arg1, i32 %arg2) local_unnamed_addr {
+bb:
+  %tmp = load i32, i32* @global.1, align 4
+  %tmp3 = getelementptr inbounds float, float* %arg, i64 190
+  %tmp4 = getelementptr inbounds float, float* %arg1, i64 512
+  %tmp5 = and i32 %tmp, 65535
+  %tmp6 = icmp ugt i32 %arg2, 65536
+  br i1 %tmp6, label %bb7, label %bb9
+
+bb7:                                              ; preds = %bb
+  %tmp8 = load i32, i32* @global, align 4
+  br label %bb27
+
+bb9:                                              ; preds = %bb
+  %tmp10 = udiv i32 65536, %arg2
+  br label %bb11
+
+bb11:                                             ; preds = %bb11, %bb9
+  %tmp12 = phi i32 [ %tmp20, %bb11 ], [ %tmp5, %bb9 ]
+  %tmp13 = phi float* [ %tmp18, %bb11 ], [ %tmp4, %bb9 ]
+  %tmp14 = phi i32 [ %tmp16, %bb11 ], [ %tmp10, %bb9 ]
+  %tmp15 = phi i32 [ %tmp19, %bb11 ], [ %tmp, %bb9 ]
+  %tmp16 = add nsw i32 %tmp14, -1
+  %tmp17 = sitofp i32 %tmp12 to float
+  store float %tmp17, float* %tmp13, align 4
+  %tmp18 = getelementptr inbounds float, float* %tmp13, i64 1
+  %tmp19 = add i32 %tmp15, %arg2
+  %tmp20 = and i32 %tmp19, 65535
+  %tmp21 = icmp eq i32 %tmp16, 0
+  br i1 %tmp21, label %bb22, label %bb11
+
+bb22:                                             ; preds = %bb11
+  %tmp23 = phi float* [ %tmp18, %bb11 ]
+  %tmp24 = phi i32 [ %tmp19, %bb11 ]
+  %tmp25 = phi i32 [ %tmp20, %bb11 ]
+  %tmp26 = ashr i32 %tmp24, 16
+  store i32 %tmp26, i32* @global, align 4
+  br label %bb27
+
+bb27:                                             ; preds = %bb22, %bb7
+  %tmp28 = phi i32 [ %tmp26, %bb22 ], [ %tmp8, %bb7 ]
+  %tmp29 = phi float* [ %tmp23, %bb22 ], [ %tmp4, %bb7 ]
+  %tmp30 = phi i32 [ %tmp25, %bb22 ], [ %tmp5, %bb7 ]
+  %tmp31 = sext i32 %tmp28 to i64
+  %tmp32 = getelementptr inbounds float, float* %tmp3, i64 %tmp31
+  %tmp33 = load float, float* %tmp32, align 4
+  %tmp34 = sitofp i32 %tmp30 to float
+  %tmp35 = load float, float* @global.2, align 4
+  %tmp36 = fmul float %tmp35, %tmp34
+  %tmp37 = fadd float %tmp33, %tmp36
+  store float %tmp37, float* %tmp29, align 4
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr34681.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr34681.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr34681.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr34681.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,122 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check the scenario where we have an unknown Stride, which happens to also be
+; the loop iteration count, so if we specialize the loop for the Stride==1 case,
+; this also implies that the loop will iterate no more than a single iteration,
+; as in the following example: 
+;
+;       unsigned int N;
+;       int tmp = 0;
+;       for(unsigned int k=0;k<N;k++) {
+;         tmp+=(int)B[k*N+j];
+;       }
+;
+; We check here that the following runtime scev guard for Stride==1 is NOT generated:
+; vector.scevcheck:
+;   %ident.check = icmp ne i32 %N, 1
+;   %0 = or i1 false, %ident.check
+;   br i1 %0, label %scalar.ph, label %vector.ph
+; Instead the loop is vectorized with an unknown stride.
+
+; CHECK-LABEL: @foo1
+; CHECK: for.body.lr.ph
+; CHECK-NOT: %ident.check = icmp ne i32 %N, 1
+; CHECK-NOT: %[[TEST:[0-9]+]] = or i1 false, %ident.check
+; CHECK-NOT: br i1 %[[TEST]], label %scalar.ph, label %vector.ph
+; CHECK: vector.ph
+; CHECK: vector.body
+; CHECK: <4 x i32>
+; CHECK: middle.block
+; CHECK: scalar.ph
+
+
+define i32 @foo1(i32 %N, i16* nocapture readnone %A, i16* nocapture readonly %B, i32 %i, i32 %j)  {
+entry:
+  %cmp8 = icmp eq i32 %N, 0
+  br i1 %cmp8, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %tmp.010 = phi i32 [ 0, %for.body.lr.ph ], [ %add1, %for.body ]
+  %k.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %mul = mul i32 %k.09, %N
+  %add = add i32 %mul, %j
+  %arrayidx = getelementptr inbounds i16, i16* %B, i32 %add
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %0 to i32
+  %add1 = add nsw i32 %tmp.010, %conv
+  %inc = add nuw i32 %k.09, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  %add1.lcssa = phi i32 [ %add1, %for.body ]
+  br label %for.end
+
+for.end: 
+  %tmp.0.lcssa = phi i32 [ 0, %entry ], [ %add1.lcssa, %for.end.loopexit ]
+  ret i32 %tmp.0.lcssa
+}
+
+
+; Check the same, but also where the Stride and the loop iteration count
+; are not of the same data type. 
+;
+;       unsigned short N;
+;       int tmp = 0;
+;       for(unsigned int k=0;k<N;k++) {
+;         tmp+=(int)B[k*N+j];
+;       }
+;
+; We check here that the following runtime scev guard for Stride==1 is NOT generated:
+; vector.scevcheck:
+; %ident.check = icmp ne i16 %N, 1
+; %0 = or i1 false, %ident.check
+; br i1 %0, label %scalar.ph, label %vector.ph
+
+
+; CHECK-LABEL: @foo2
+; CHECK: for.body.lr.ph
+; CHECK-NOT: %ident.check = icmp ne i16 %N, 1
+; CHECK-NOT: %[[TEST:[0-9]+]] = or i1 false, %ident.check
+; CHECK-NOT: br i1 %[[TEST]], label %scalar.ph, label %vector.ph
+; CHECK: vector.ph
+; CHECK: vector.body
+; CHECK: <4 x i32>
+; CHECK: middle.block
+; CHECK: scalar.ph
+
+define i32 @foo2(i16 zeroext %N, i16* nocapture readnone %A, i16* nocapture readonly %B, i32 %i, i32 %j) {
+entry:
+  %conv = zext i16 %N to i32
+  %cmp11 = icmp eq i16 %N, 0
+  br i1 %cmp11, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %tmp.013 = phi i32 [ 0, %for.body.lr.ph ], [ %add4, %for.body ]
+  %k.012 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %mul = mul nuw i32 %k.012, %conv
+  %add = add i32 %mul, %j
+  %arrayidx = getelementptr inbounds i16, i16* %B, i32 %add
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv3 = sext i16 %0 to i32
+  %add4 = add nsw i32 %tmp.013, %conv3
+  %inc = add nuw nsw i32 %k.012, 1
+  %exitcond = icmp eq i32 %inc, %conv
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  %add4.lcssa = phi i32 [ %add4, %for.body ]
+  br label %for.end
+
+for.end:
+  %tmp.0.lcssa = phi i32 [ 0, %entry ], [ %add4.lcssa, %for.end.loopexit ]
+  ret i32 %tmp.0.lcssa
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr35743.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr35743.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr35743.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr35743.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,102 @@
+; RUN: opt < %s  -loop-vectorize -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+; This cannot be correctly vectorized with type i1.
+define i8 @test_01(i8 %c) #0 {
+
+; CHECK-LABEL: @test_01(
+; CHECK-NOT:   vector.body:
+; CHECK-NOT:   zext i1 {{.*}} to i8
+
+entry:
+  br label %loop
+
+exit:                                           ; preds = %loop
+  ret i8 %accum.plus
+
+loop:                                            ; preds = %loop, %entry
+  %accum.phi = phi i8 [ %c, %entry ], [ %accum.plus, %loop ]
+  %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop ]
+  %accum.and = and i8 %accum.phi, 1
+  %accum.plus = add nuw nsw i8 %accum.and, 3
+  %iv.next = add nuw nsw i32 %iv, 1
+  %cond = icmp ugt i32 %iv, 191
+  br i1 %cond, label %exit, label %loop
+}
+
+; TODO: This can be vectorized with type i1 because the result is not used.
+define void @test_02(i8 %c) #0 {
+
+; CHECK-LABEL: @test_02(
+; CHECK-NOT:   vector.body:
+
+entry:
+  br label %loop
+
+exit:                                           ; preds = %loop
+  %lcssa = phi i8 [ %accum.plus, %loop ]
+  ret void
+
+loop:                                            ; preds = %loop, %entry
+  %accum.phi = phi i8 [ %c, %entry ], [ %accum.plus, %loop ]
+  %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop ]
+  %accum.and = and i8 %accum.phi, 1
+  %accum.plus = add nuw nsw i8 %accum.and, 3
+  %iv.next = add nuw nsw i32 %iv, 1
+  %cond = icmp ugt i32 %iv, 191
+  br i1 %cond, label %exit, label %loop
+}
+
+; This can be vectorized with type i1 because the result is truncated properly.
+define i1 @test_03(i8 %c) #0 {
+
+; CHECK-LABEL: @test_03(
+; CHECK:   vector.body:
+; CHECK:   zext i1 {{.*}} to i8
+
+entry:
+  br label %loop
+
+exit:                                           ; preds = %loop
+  %lcssa = phi i8 [ %accum.plus, %loop ]
+  %trunc = trunc i8 %lcssa to i1
+  ret i1 %trunc
+
+loop:                                            ; preds = %loop, %entry
+  %accum.phi = phi i8 [ %c, %entry ], [ %accum.plus, %loop ]
+  %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop ]
+  %accum.and = and i8 %accum.phi, 1
+  %accum.plus = add nuw nsw i8 %accum.and, 3
+  %iv.next = add nuw nsw i32 %iv, 1
+  %cond = icmp ugt i32 %iv, 191
+  br i1 %cond, label %exit, label %loop
+}
+
+; This cannot be vectorized with type i1 because the result is truncated to a
+; wrong type.
+; TODO: It can also be vectorized with type i32 (or maybe i4?)
+define i4 @test_04(i8 %c) #0 {
+
+; CHECK-LABEL: @test_04(
+; CHECK-NOT:   vector.body:
+; CHECK-NOT:   zext i1 {{.*}} to i8
+
+entry:
+  br label %loop
+
+exit:                                           ; preds = %loop
+  %lcssa = phi i8 [ %accum.plus, %loop ]
+  %trunc = trunc i8 %lcssa to i4
+  ret i4 %trunc
+
+loop:                                            ; preds = %loop, %entry
+  %accum.phi = phi i8 [ %c, %entry ], [ %accum.plus, %loop ]
+  %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop ]
+  %accum.and = and i8 %accum.phi, 1
+  %accum.plus = add nuw nsw i8 %accum.and, 3
+  %iv.next = add nuw nsw i32 %iv, 1
+  %cond = icmp ugt i32 %iv, 191
+  br i1 %cond, label %exit, label %loop
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr35773.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr35773.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr35773.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr35773.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,53 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+ at a = common local_unnamed_addr global i32 0, align 4
+ at b = common local_unnamed_addr global i8 0, align 1
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit1() local_unnamed_addr{
+entry:
+  br label %for.body
+
+for.body:
+  %main.iv = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+
+  %i8.iv = phi i8 [ 0, %entry ], [ %i8.add, %for.body ]
+  %i32.iv = phi i32 [ 0, %entry ], [ %i32.add, %for.body ]
+
+  %trunc.to.be.converted.to.new.iv = trunc i32 %i32.iv to i8
+  %i8.add = add i8 %i8.iv, %trunc.to.be.converted.to.new.iv
+
+  %noop.conv.under.pse = and i32 %i32.iv, 255
+  %i32.add = add nuw nsw i32 %noop.conv.under.pse, 9
+
+  %inc = add i32 %main.iv, 1
+  %tobool = icmp eq i32 %inc, 16
+  br i1 %tobool, label %for.cond.for.end_crit_edge, label %for.body
+
+; CHECK-LABEL: @doit1(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[MAIN_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[MAIN_IV_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[I8_IV:%.*]] = phi <4 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[I8_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I32_IV:%.*]] = phi <4 x i32> [ <i32 0, i32 9, i32 18, i32 27>, [[VECTOR_PH]] ], [ [[I32_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[IV_FROM_TRUNC:%.*]] = phi <4 x i8> [ <i8 0, i8 9, i8 18, i8 27>, [[VECTOR_PH]] ], [ [[IV_FROM_TRUNC_NEXT:%.*]], [[VECTOR_BODY]] ]
+
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[MAIN_IV]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[MAIN_IV]], 0
+
+; CHECK-NEXT:    [[I8_IV_NEXT]] = add <4 x i8> [[I8_IV]], [[IV_FROM_TRUNC]]
+
+; CHECK-NEXT:    [[MAIN_IV_NEXT]] = add i32 [[MAIN_IV]], 4
+; CHECK-NEXT:    [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], <i32 36, i32 36, i32 36, i32 36>
+; CHECK-NEXT:    [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], <i8 36, i8 36, i8 36, i8 36>
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[MAIN_IV_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+
+for.cond.for.end_crit_edge:
+  store i8 %i8.add, i8* @b, align 1
+  br label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr36311.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr36311.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr36311.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr36311.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,49 @@
+; RUN: opt -loop-vectorize -force-vector-width=2 -S < %s
+;
+; Cleaned up version of fe_tools.all_dimensions.ll from PR36311.
+; Forcing VF=2 to trigger vector code gen
+;
+; This is a test case that let's vectorizer's code gen to modify CFG and get
+; DomTree out of date, such that an assert from SCEV would trigger if
+; reanalysis of SCEV happens subsequently. Once vector code gen starts,
+; vectorizer should not invoke recomputation of Analysis.
+
+$test = comdat any
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: uwtable
+define dso_local void @test() local_unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  br label %for.body51
+
+for.body51:                                       ; preds = %for.body51, %entry
+  br i1 undef, label %for.body51, label %for.body89.lr.ph
+
+for.cond80.loopexit:                              ; preds = %for.body89
+  %inc94.lcssa = phi i32 [ %inc94, %for.body89 ]
+  br i1 undef, label %for.body89.lr.ph, label %nrvo.skipdtor.loopexit
+
+for.body89.lr.ph:                                 ; preds = %for.cond80.loopexit, %for.body51
+  %i79.0179 = phi i32 [ %add90, %for.cond80.loopexit ], [ 0, %for.body51 ]
+  %next_index.4178 = phi i32 [ %inc94.lcssa, %for.cond80.loopexit ], [ undef, %for.body51 ]
+  %add90 = add nuw i32 %i79.0179, 1
+  %mul91 = mul i32 %add90, undef
+  br label %for.body89
+
+for.body89:                                       ; preds = %for.body89, %for.body89.lr.ph
+  %j.0175 = phi i32 [ 0, %for.body89.lr.ph ], [ %add92, %for.body89 ]
+  %next_index.5174 = phi i32 [ %next_index.4178, %for.body89.lr.ph ], [ %inc94, %for.body89 ]
+  %add92 = add nuw i32 %j.0175, 1
+  %add93 = add i32 %add92, %mul91
+  %inc94 = add i32 %next_index.5174, 1
+  %conv95 = zext i32 %next_index.5174 to i64
+  %arrayidx.i160 = getelementptr inbounds i32, i32* undef, i64 %conv95
+  store i32 %add93, i32* %arrayidx.i160, align 4
+;, !tbaa !1
+  %cmp87 = icmp ult i32 %add92, undef
+  br i1 %cmp87, label %for.body89, label %for.cond80.loopexit
+
+nrvo.skipdtor.loopexit:                           ; preds = %for.cond80.loopexit
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr36983.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr36983.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr36983.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr36983.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+
+; There could be more than one LCSSA PHIs in loop exit block.
+
+; CHECK-LABEL: bb1.bb3_crit_edge:
+; CHECK: %_tmp133.lcssa1 = phi i16 [ %scalar.recur, %bb2 ], [ %vector.recur.extract.for.phi, %middle.block ]
+; CHECK: %_tmp133.lcssa = phi i16 [ %scalar.recur, %bb2 ], [ %vector.recur.extract.for.phi, %middle.block ]
+
+define void @f1() {
+bb2.lr.ph:
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %bb2.lr.ph
+  %_tmp132 = phi i16 [ 0, %bb2.lr.ph ], [ %_tmp10, %bb2 ]
+  %_tmp133 = phi i16 [ undef, %bb2.lr.ph ], [ %_tmp10, %bb2 ]
+  %_tmp10 = sub nsw i16 %_tmp132, 1
+  %_tmp15 = icmp ne i16 %_tmp10, 0
+  br i1 %_tmp15, label %bb2, label %bb1.bb3_crit_edge
+
+bb1.bb3_crit_edge:                                ; preds = %bb2
+  %_tmp133.lcssa1 = phi i16 [ %_tmp133, %bb2 ]
+  %_tmp133.lcssa = phi i16 [ %_tmp133, %bb2 ]
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr37248.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr37248.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr37248.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr37248.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt -passes='loop-vectorize' -force-vector-width=2 -S < %s | FileCheck %s
+;
+; Forcing VF=2 to trigger vector code gen
+;
+; This is a test case that let's vectorizer's code gen to generate
+; more than one BasicBlocks in the loop body (emulated masked scatter)
+; for those targets that do not support masked scatter. Broadcast
+; code generation was previously dependent on loop body being
+; a single basic block and this test case exposed incorrect code gen
+; resulting in an assert in IL verification. Test passes if IL verification
+; does not fail.
+;
+; Performing minimal check in the output to ensure the loop is actually
+; vectorized.
+;
+; CHECK: vector.body
+
+ at a = external global [2 x i16], align 1
+
+define void @f1() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %land.end, %entry
+  %0 = phi i32 [ undef, %entry ], [ %dec, %land.end ]
+  br i1 undef, label %land.end, label %land.rhs
+
+land.rhs:                                         ; preds = %for.body
+  %1 = load i32, i32* undef, align 1
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %for.body
+  %2 = trunc i32 %0 to i16
+  %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* @a, i16 0, i16 %2
+  store i16 undef, i16* %arrayidx, align 1
+  %dec = add nsw i32 %0, -1
+  %cmp = icmp sgt i32 %0, 1
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %land.end
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr37515.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr37515.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr37515.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr37515.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt -passes='loop-vectorize' -S -pass-remarks-missed=loop-vectorize < %s 2>&1 | FileCheck %s
+;
+; FP primary induction is not supported in LV. Make sure Legal bails out.
+;
+; CHECK: loop not vectorized
+
+define void @PR37515() {
+entry:
+  br label %loop
+
+loop:
+  %p = phi float [ 19.0, %entry ], [ %a, %loop ]
+  %a = fadd fast float %p, -1.0
+  %m = fmul fast float %a, %a
+  %c = fcmp fast ugt float %a, 2.0
+  br i1 %c, label %loop, label %exit
+
+exit:
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr38800.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr38800.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr38800.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr38800.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; RUN: opt -loop-vectorize -force-vector-width=2 -pass-remarks-missed='loop-vectorize' -S < %s 2>&1 | FileCheck %s
+
+; CHECK: remark: <unknown>:0:0: loop not vectorized: integer loop induction variable could not be identified
+
+; Test-case ('-O2 -ffast-math') from PR38800.
+; (Set '-force-vector-width=2' to enable vector code generation.)
+;
+; No integral induction variable in the source-code caused a compiler-crash
+; when attempting to vectorize.  With the fix, a remark indicating why it
+; wasn't vectorized is produced
+;
+;void foo(float *ptr, float val) {
+;  float f;
+;  for (f = 0.1f; f < 1.0f; f += 0.01f)
+;    *ptr += val;
+;}
+
+define void @foo(float* nocapture %ptr, float %val) local_unnamed_addr {
+entry:
+  %ptr.promoted = load float, float* %ptr, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %add5 = phi float [ %ptr.promoted, %entry ], [ %add, %for.body ]
+  %f.04 = phi float [ 0x3FB99999A0000000, %entry ], [ %add1, %for.body ]
+  %add = fadd fast float %add5, %val
+  %add1 = fadd fast float %f.04, 0x3F847AE140000000
+  %cmp = fcmp fast olt float %add1, 1.000000e+00
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  store float %add, float* %ptr, align 4
+  ret void
+}

Propchange: llvm/trunk/test/Transforms/LoopVectorize/pr38800.ll
------------------------------------------------------------------------------
    svn:executable = *

Added: llvm/trunk/test/Transforms/LoopVectorize/pr39099.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr39099.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr39099.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr39099.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; REQUIRES: asserts
+; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+
+; Ensure that we don't create interleave groups for predicated
+; strided accesses. 
+
+; CHECK: LV: Checking a loop in "masked_strided"
+; CHECK: LV: Analyzing interleaved accesses...
+; CHECK-NOT: LV: Creating an interleave group
+
+define dso_local void @masked_strided(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
+entry:
+  %conv = zext i8 %guard to i32
+  br label %for.body
+
+for.body:
+  %ix.017 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp1 = icmp ugt i32 %ix.017, %conv
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:
+  %mul = shl nuw nsw i32 %ix.017, 1
+  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
+  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx4 = getelementptr inbounds i8, i8* %q, i32 %mul
+  store i8 %0, i8* %arrayidx4, align 1
+  %sub = sub i8 0, %0
+  %add = or i32 %mul, 1
+  %arrayidx8 = getelementptr inbounds i8, i8* %q, i32 %add
+  store i8 %sub, i8* %arrayidx8, align 1
+  br label %for.inc
+
+for.inc:
+  %inc = add nuw nsw i32 %ix.017, 1
+  %exitcond = icmp eq i32 %inc, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,54 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; PR39417
+; Check that the need for overflow check prevents vectorizing a loop with tiny
+; trip count (which implies opt for size).
+; CHECK-LABEL: @func_34
+; CHECK-NOT: vector.scevcheck
+; CHECK-NOT: vector.body:
+; CHECK-LABEL: bb67:
+define void @func_34() {
+bb1:
+  br label %bb67
+
+bb67:
+  %storemerge2 = phi i32 [ 0, %bb1 ], [ %_tmp2300, %bb67 ]
+  %sext = shl i32 %storemerge2, 16
+  %_tmp2299 = ashr exact i32 %sext, 16
+  %_tmp2300 = add nsw i32 %_tmp2299, 1
+  %_tmp2310 = trunc i32 %_tmp2300 to i16
+  %_tmp2312 = icmp slt i16 %_tmp2310, 3
+  br i1 %_tmp2312, label %bb67, label %bb68
+
+bb68:
+  ret void
+}
+
+; Check that the need for stride==1 check prevents vectorizing a loop under opt
+; for size.
+; CHECK-LABEL: @scev4stride1
+; CHECK-NOT: vector.scevcheck
+; CHECK-NOT: vector.body:
+; CHECK-LABEL: for.body:
+define void @scev4stride1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %k) #0 {
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %mul = mul nsw i32 %i.07, %k
+  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %mul
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 %i.07
+  store i32 %0, i32* %arrayidx1, align 4
+  %inc = add nuw nsw i32 %i.07, 1
+  %exitcond = icmp eq i32 %inc, 1024
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  ret void
+}
+
+attributes #0 = { optsize }

Added: llvm/trunk/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-vectorize -S 2>&1 | FileCheck %s
+; RUN: opt < %s -debugify -loop-vectorize -S | FileCheck %s -check-prefix DEBUGLOC
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; This test makes sure we don't duplicate the loop vectorizer's metadata
+; while marking them as already vectorized (by setting width = 1), even
+; at lower optimization levels, where no extra cleanup is done
+
+; DEBUGLOC-LABEL: define void @_Z3fooPf(
+; Check that the phi to resume the scalar part of the loop
+; has Debug Location.
+define void @_Z3fooPf(float* %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
+  %p = load float, float* %arrayidx, align 4
+  %mul = fmul float %p, 2.000000e+00
+  store float %mul, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+; DEBUGLOC: scalar.ph:
+; DEBUGLOC-NEXT:    %bc.resume.val = phi {{.*}} !dbg ![[DbgLoc:[0-9]+]]
+;
+; DEBUGLOC: ![[DbgLoc]] = !DILocation(line: 2
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.width", i32 4}
+; CHECK-NOT: !{metadata !"llvm.loop.vectorize.width", i32 4}
+; CHECK: !{!"llvm.loop.isvectorized", i32 1}

Added: llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; This testcase causes SCEV to return a pointer-typed exit value.
+
+; CHECK: @f
+; Expect that the pointer indvar has been converted into an integer indvar.
+; CHECK: %index.next = add i64 %index, 4
+define i32 @f(i32* readonly %a, i32* readnone %b) #0 {
+entry:
+  %cmp.6 = icmp ult i32* %a, %b
+  br i1 %cmp.6, label %while.body.preheader, label %while.end
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %a.pn = phi i32* [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ]
+  %acc.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+  %incdec.ptr8 = getelementptr inbounds i32, i32* %a.pn, i64 1
+  %0 = load i32, i32* %incdec.ptr8, align 1
+  %add = add nuw nsw i32 %0, %acc.07
+  %exitcond = icmp eq i32* %incdec.ptr8, %b
+  br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
+
+while.cond.while.end_crit_edge:                   ; preds = %while.body
+  %add.lcssa = phi i32 [ %add, %while.body ]
+  br label %while.end
+
+while.end:                                        ; preds = %while.cond.while.end_crit_edge, %entry
+  %acc.0.lcssa = phi i32 [ %add.lcssa, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+  ret i32 %acc.0.lcssa
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/ptr_loops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/ptr_loops.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/ptr_loops.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/ptr_loops.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,73 @@
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+ at A = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
+ at B = global [36 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35], align 16
+
+;CHECK-LABEL:@_Z5test1v(
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test1v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 18), %0 ], [ %4, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @B, i64 0, i64 0), %0 ], [ %5, %1 ]
+  %2 = load i32, i32* %b.01, align 4
+  %3 = shl nsw i32 %2, 1
+  store i32 %3, i32* %p.02, align 4
+  %4 = getelementptr inbounds i32, i32* %p.02, i64 -1
+  %5 = getelementptr inbounds i32, i32* %b.01, i64 1
+  %6 = icmp eq i32* %4, getelementptr ([36 x i32], [36 x i32]* @A, i64 128102389400760775, i64 3)
+  br i1 %6, label %7, label %1
+
+; <label>:7                                       ; preds = %1
+  ret i32 0
+}
+
+;CHECK-LABEL: @_Z5test2v(
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test2v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 25), %0 ], [ %3, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @B, i64 0, i64 2), %0 ], [ %4, %1 ]
+  %2 = load i32, i32* %b.01, align 4
+  store i32 %2, i32* %p.02, align 4
+  %3 = getelementptr inbounds i32, i32* %p.02, i64 -1
+  %4 = getelementptr inbounds i32, i32* %b.01, i64 1
+  %5 = icmp eq i32* %4, getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 18)
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret i32 0
+}
+
+;CHECK:_Z5test3v
+;CHECK: load <4 x i32>
+;CHECK: shufflevector <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @_Z5test3v() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %0, %1
+  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 29), %0 ], [ %3, %1 ]
+  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @B, i64 0, i64 5), %0 ], [ %4, %1 ]
+  %2 = load i32, i32* %b.01, align 4
+  store i32 %2, i32* %p.02, align 4
+  %3 = getelementptr inbounds i32, i32* %p.02, i64 -1
+  %4 = getelementptr inbounds i32, i32* %b.01, i64 1
+  %5 = icmp eq i32* %3, getelementptr ([36 x i32], [36 x i32]* @A, i64 128102389400760775, i64 3)
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret i32 0
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/read-only.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/read-only.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/read-only.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/read-only.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;CHECK-LABEL: @read_only_func(
+;CHECK: load <4 x i32>
+;CHECK: ret i32
+define i32 @read_only_func(i32* nocapture %A, i32* nocapture %B, i32 %n) nounwind uwtable readonly ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %3 = load i32, i32* %2, align 4
+  %4 = add nsw i64 %indvars.iv, 13
+  %5 = getelementptr inbounds i32, i32* %B, i64 %4
+  %6 = load i32, i32* %5, align 4
+  %7 = shl i32 %6, 1
+  %8 = add i32 %3, %sum.02
+  %9 = add i32 %8, %7
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/reduction-small-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/reduction-small-size.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/reduction-small-size.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/reduction-small-size.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,73 @@
+; RUN: opt < %s -force-vector-width=4 -force-vector-interleave=1 -loop-vectorize -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @PR34687(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP17:%.*]], %[[LATCH]] ]
+; CHECK:       [[LATCH]]:
+; CHECK:         [[TMP13:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    [[TMP14:%.*]] = add nuw nsw <4 x i32> [[TMP13]], {{.*}}
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK:         [[TMP16:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i8>
+; CHECK-NEXT:    [[TMP17]] = zext <4 x i8> [[TMP16]] to <4 x i32>
+; CHECK-NEXT:    br i1 {{.*}}, label %middle.block, label %vector.body
+;
+define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %if.end ]
+  %r = phi i32 [ 0, %entry ], [ %r.next, %if.end ]
+  br i1 %c, label %if.then, label %if.end
+
+if.then:
+  %tmp0 = sdiv i32 undef, undef
+  br label %if.end
+
+if.end:
+  %tmp1 = and i32 %r, 255
+  %i.next = add nsw i32 %i, 1
+  %r.next = add nuw nsw i32 %tmp1, %x
+  %cond = icmp eq i32 %i.next, %n
+  br i1 %cond, label %for.end, label %for.body
+
+for.end:
+  %tmp2 = phi i32 [ %r.next, %if.end ]
+  %tmp3 = trunc i32 %tmp2 to i8
+  ret i8 %tmp3
+}
+
+; CHECK-LABEL: @PR35734(
+; CHECK:       vector.ph:
+; CHECK:         [[TMP3:%.*]] = insertelement <4 x i32> zeroinitializer, i32 %y, i32 0
+; CHECK-NEXT:    br label %vector.body
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP3]], %vector.ph ], [ [[TMP9:%.*]], %vector.body ]
+; CHECK:         [[TMP5:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK:         [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i1>
+; CHECK-NEXT:    [[TMP9]] = sext <4 x i1> [[TMP8]] to <4 x i32>
+; CHECK-NEXT:    br i1 {{.*}}, label %middle.block, label %vector.body
+;
+define i32 @PR35734(i32 %x, i32 %y) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ %x, %entry ], [ %i.next, %for.body ]
+  %r = phi i32 [ %y, %entry ], [ %r.next, %for.body ]
+  %tmp0 = and i32 %r, 1
+  %r.next = add i32 %tmp0, -1
+  %i.next = add nsw i32 %i, 1
+  %cond = icmp sgt i32 %i, 77
+  br i1 %cond, label %for.end, label %for.body
+
+for.end:
+  %tmp1 = phi i32 [ %r.next, %for.body ]
+  ret i32 %tmp1
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/reduction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/reduction.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/reduction.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/reduction.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,580 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;CHECK-LABEL: @reduction_sum(
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+;CHECK: ret i32
+define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %5 = load i32, i32* %4, align 4
+  %6 = trunc i64 %indvars.iv to i32
+  %7 = add i32 %sum.02, %6
+  %8 = add i32 %7, %3
+  %9 = add i32 %8, %5
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}
+
+;CHECK-LABEL: @reduction_prod(
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+;CHECK: ret i32
+define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ]
+  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %5 = load i32, i32* %4, align 4
+  %6 = trunc i64 %indvars.iv to i32
+  %7 = mul i32 %prod.02, %6
+  %8 = mul i32 %7, %3
+  %9 = mul i32 %8, %5
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %prod.0.lcssa = phi i32 [ 1, %0 ], [ %9, %.lr.ph ]
+  ret i32 %prod.0.lcssa
+}
+
+;CHECK-LABEL: @reduction_mix(
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: mul nsw <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+;CHECK: ret i32
+define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %5 = load i32, i32* %4, align 4
+  %6 = mul nsw i32 %5, %3
+  %7 = trunc i64 %indvars.iv to i32
+  %8 = add i32 %sum.02, %7
+  %9 = add i32 %8, %6
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}
+
+;CHECK-LABEL: @reduction_mul(
+;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: mul <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+;CHECK: ret i32
+define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %sum.02 = phi i32 [ %9, %.lr.ph ], [ 19, %0 ]
+  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %5 = load i32, i32* %4, align 4
+  %6 = trunc i64 %indvars.iv to i32
+  %7 = add i32 %3, %6
+  %8 = add i32 %7, %5
+  %9 = mul i32 %8, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}
+
+;CHECK-LABEL: @start_at_non_zero(
+;CHECK: phi <4 x i32>
+;CHECK: <i32 120, i32 0, i32 0, i32 0>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+;CHECK: ret i32
+define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %coeff, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, %sum.09
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 120, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+;CHECK-LABEL: @reduction_and(
+;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
+;CHECK: and <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: and <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: and <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+;CHECK: ret i32
+define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %and = and i32 %add, %result.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ]
+  ret i32 %result.0.lcssa
+}
+
+;CHECK-LABEL: @reduction_or(
+;CHECK: or <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: or <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: or <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+;CHECK: ret i32
+define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %or = or i32 %add, %result.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ]
+  ret i32 %result.0.lcssa
+}
+
+;CHECK-LABEL: @reduction_xor(
+;CHECK: xor <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: xor <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: xor <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+;CHECK: ret i32
+define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %xor = xor i32 %add, %result.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
+  ret i32 %result.0.lcssa
+}
+
+; In this code the subtracted variable is on the RHS and this is not an induction variable.
+;CHECK-LABEL: @reduction_sub_rhs(
+;CHECK-NOT: phi <4 x i32>
+;CHECK-NOT: sub nsw <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_sub_rhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %sub = sub nsw i32 %0, %x.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
+  ret i32 %x.0.lcssa
+}
+
+
+; In this test the reduction variable is on the LHS and we can vectorize it.
+;CHECK-LABEL: @reduction_sub_lhs(
+;CHECK: phi <4 x i32>
+;CHECK: sub nsw <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %sub = sub nsw i32 %x.05, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
+  ret i32 %x.0.lcssa
+}
+
+; We can vectorize conditional reductions with multi-input phis.
+; CHECK: reduction_conditional
+; CHECK: fadd fast <4 x float>
+
+define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
+  %1 = load float, float* %arrayidx2, align 4
+  %cmp3 = fcmp ogt float %0, %1
+  br i1 %cmp3, label %if.then, label %for.inc
+
+if.then:
+  %cmp6 = fcmp ogt float %1, 1.000000e+00
+  br i1 %cmp6, label %if.then8, label %if.else
+
+if.then8:
+  %add = fadd fast float %sum.033, %0
+  br label %for.inc
+
+if.else:
+  %cmp14 = fcmp ogt float %0, 2.000000e+00
+  br i1 %cmp14, label %if.then16, label %for.inc
+
+if.then16:
+  %add19 = fadd fast float %sum.033, %1
+  br label %for.inc
+
+for.inc:
+  %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
+  ret float %sum.1.lcssa
+}
+
+; We can't vectorize reductions with phi inputs from outside the reduction.
+; CHECK: noreduction_phi
+; CHECK-NOT: fadd <4 x float>
+define float @noreduction_phi(float* %A, float* %B, float* %C, float %S) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
+  %1 = load float, float* %arrayidx2, align 4
+  %cmp3 = fcmp ogt float %0, %1
+  br i1 %cmp3, label %if.then, label %for.inc
+
+if.then:
+  %cmp6 = fcmp ogt float %1, 1.000000e+00
+  br i1 %cmp6, label %if.then8, label %if.else
+
+if.then8:
+  %add = fadd fast float %sum.033, %0
+  br label %for.inc
+
+if.else:
+  %cmp14 = fcmp ogt float %0, 2.000000e+00
+  br i1 %cmp14, label %if.then16, label %for.inc
+
+if.then16:
+  %add19 = fadd fast float %sum.033, %1
+  br label %for.inc
+
+for.inc:
+  %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ 0.000000e+00, %if.else ], [ %sum.033, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
+  ret float %sum.1.lcssa
+}
+
+; We can't vectorize reductions that feed another header PHI.
+; CHECK: noredux_header_phi
+; CHECK-NOT: fadd <4 x float>
+
+define float @noredux_header_phi(float* %A, float* %B, float* %C, float %S)  {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum2.09 = phi float [ 0.000000e+00, %entry ], [ %add1, %for.body ]
+  %sum.08 = phi float [ %S, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4
+  %add = fadd fast float %sum.08, %0
+  %add1 = fadd fast float %sum2.09, %add
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, 128
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  %add1.lcssa = phi float [ %add1, %for.body ]
+  %add.lcssa = phi float [ %add, %for.body ]
+  %add2 = fadd fast float %add.lcssa, %add1.lcssa
+  ret float %add2
+}
+
+
+; When vectorizing a reduction whose loop header phi value is used outside the
+; loop special care must be taken. Otherwise, the reduced value feeding into the
+; outside user misses a few iterations (VF-1) of the loop.
+; PR16522
+
+; CHECK-LABEL: @phivalueredux(
+; CHECK-NOT: x i32>
+
+define i32 @phivalueredux(i32 %p) {
+entry:
+  br label %for.body
+
+for.body:
+  %t.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %p.addr.02 = phi i32 [ %p, %entry ], [ %xor, %for.body ]
+  %xor = xor i32 %p.addr.02, -1
+  %inc = add nsw i32 %t.03, 1
+  %exitcond = icmp eq i32 %inc, 16
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %p.addr.02
+}
+
+; Don't vectorize a reduction value that is not the last in a reduction cyle. We
+; would loose iterations (VF-1) on the operations after that use.
+; PR17498
+
+; CHECK-LABEL: not_last_operation
+; CHECK-NOT: x i32>
+define i32 @not_last_operation(i32 %p, i32 %val) {
+entry:
+  %tobool = icmp eq i32 %p, 0
+  br label %for.body
+
+for.body:
+  %inc613.1 = phi i32 [ 0, %entry ], [ %inc6.1, %for.body ]
+  %inc511.1 = phi i32 [ %val, %entry ], [ %inc5.1, %for.body ]
+  %0 = zext i1 %tobool to i32
+  %inc4.1 = xor i32 %0, 1
+  %inc511.1.inc4.1 = add nsw i32 %inc511.1, %inc4.1
+  %inc5.1 = add nsw i32 %inc511.1.inc4.1, 1
+  %inc6.1 = add nsw i32 %inc613.1, 1
+  %exitcond.1 = icmp eq i32 %inc6.1, 22
+  br i1 %exitcond.1, label %exit, label %for.body
+
+exit:
+  %inc.2 = add nsw i32 %inc511.1.inc4.1, 2
+  ret i32 %inc.2
+}
+
+;CHECK-LABEL: @reduction_sum_multiuse(
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+;CHECK: add <4 x i32>
+;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+;CHECK: %sum.lcssa = phi i32 [ %[[SCALAR:.*]], %.lr.ph ], [ %[[VECTOR:.*]], %middle.block ]
+;CHECK: %sum.copy = phi i32 [ %[[SCALAR]], %.lr.ph ], [ %[[VECTOR]], %middle.block ]
+;CHECK: ret i32
+define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph.preheader, label %end
+.lr.ph.preheader:                                 ; preds = %0
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+  %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %5 = load i32, i32* %4, align 4
+  %6 = trunc i64 %indvars.iv to i32
+  %7 = add i32 %sum.02, %6
+  %8 = add i32 %7, %3
+  %9 = add i32 %8, %5
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  %sum.lcssa = phi i32 [ %9, %.lr.ph ]
+  %sum.copy = phi i32 [ %9, %.lr.ph ]
+  br label %end
+
+end:
+  %f1 = phi i32 [ 0, %0 ], [ %sum.lcssa, %._crit_edge ]
+  %f2 = phi i32 [ 0, %0 ], [ %sum.copy, %._crit_edge ]
+  %final = add i32 %f1, %f2
+  ret i32 %final
+}
+
+; This looks like a predicated reduction, but it is a reset of the reduction
+; variable. We cannot vectorize this.
+; CHECK-LABEL: reduction_reset(
+; CHECK-NOT: <4 x i32>
+define void @reduction_reset(i32 %N, i32* nocapture readonly %arrayA, i32* nocapture %arrayB) { 
+entry:
+  %c4 = icmp sgt i32 %N, 0
+  br i1 %c4, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader:                                 ; preds = %entry
+  %c5 = add i32 %N, -1
+  %wide.trip.count = zext i32 %N to i64
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
+  %indvars.iv = phi i64 [ 0, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ]
+  %.017 = phi i32 [ 100, %.lr.ph.preheader ], [ %csel, %.lr.ph ]
+  %c6 = getelementptr inbounds i32, i32* %arrayA, i64 %indvars.iv
+  %c7 = load i32, i32* %c6, align 4
+  %c8 = icmp sgt i32 %c7, 0
+  %c9 = add nsw i32 %c7, %.017
+  %csel = select i1 %c8, i32 %c9, i32 0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+
+._crit_edge.loopexit:                             ; preds = %.lr.ph
+  %csel.lcssa = phi i32 [ %csel, %.lr.ph ]
+  %phitmp19 = sext i32 %c5 to i64
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge.loopexit, %entry
+  %.015.lcssa = phi i64 [ -1, %entry ], [ %phitmp19, %._crit_edge.loopexit ]
+  %.0.lcssa = phi i32 [ 100, %entry ], [ %csel.lcssa, %._crit_edge.loopexit ]
+  %c10 = getelementptr inbounds i32, i32* %arrayB, i64 %.015.lcssa
+  store i32 %.0.lcssa, i32* %c10, align 4
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/remove_metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/remove_metadata.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/remove_metadata.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/remove_metadata.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; RUN: opt -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s
+;
+; Check that llvm.loop.vectorize.* metadata is removed after vectorization.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @disable_nonforced_enable(
+; CHECK: store <2 x i32>
+define void @disable_nonforced_enable(i32* nocapture %a, i32 %n) {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !{!"llvm.loop.vectorize.some_property"}, !{!"llvm.loop.vectorize.enable", i32 1}}
+
+; CHECK-NOT: llvm.loop.vectorize.
+; CHECK: {!"llvm.loop.isvectorized", i32 1}
+; CHECK-NOT: llvm.loop.vectorize.

Added: llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,152 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Make sure consecutive vector generates correct negative indices.
+; PR15882
+
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = sub i64 %startval, %index
+; CHECK: %[[a0:.+]] = add i64 %offset.idx, 0
+; CHECK: %[[a4:.+]] = add i64 %offset.idx, -4
+
+define i32 @reverse_induction_i64(i64 %startval, i32 * %ptr) {
+entry:
+  br label %for.body
+
+for.body:
+  %add.i7 = phi i64 [ %startval, %entry ], [ %add.i, %for.body ]
+  %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
+  %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
+  %add.i = add i64 %add.i7, -1
+  %kind_.i = getelementptr inbounds i32, i32* %ptr, i64 %add.i
+  %tmp.i1 = load i32, i32* %kind_.i, align 4
+  %inc.redux = add i32 %tmp.i1, %redux5
+  %inc4 = add i32 %i.06, 1
+  %exitcond = icmp ne i32 %inc4, 1024
+  br i1 %exitcond, label %for.body, label %loopend
+
+loopend:
+  ret i32 %inc.redux
+}
+
+; CHECK-LABEL: @reverse_induction_i128(
+; CHECK: %index = phi i128 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = sub i128 %startval, %index
+; CHECK: %[[a0:.+]] = add i128 %offset.idx, 0
+; CHECK: %[[a4:.+]] = add i128 %offset.idx, -4
+
+define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) {
+entry:
+  br label %for.body
+
+for.body:
+  %add.i7 = phi i128 [ %startval, %entry ], [ %add.i, %for.body ]
+  %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
+  %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
+  %add.i = add i128 %add.i7, -1
+  %kind_.i = getelementptr inbounds i32, i32* %ptr, i128 %add.i
+  %tmp.i1 = load i32, i32* %kind_.i, align 4
+  %inc.redux = add i32 %tmp.i1, %redux5
+  %inc4 = add i32 %i.06, 1
+  %exitcond = icmp ne i32 %inc4, 1024
+  br i1 %exitcond, label %for.body, label %loopend
+
+loopend:
+  ret i32 %inc.redux
+}
+
+; CHECK-LABEL: @reverse_induction_i16(
+; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = sub i16 %startval, {{.*}}
+; CHECK: %[[a0:.+]] = add i16 %offset.idx, 0
+; CHECK: %[[a4:.+]] = add i16 %offset.idx, -4
+
+define i32 @reverse_induction_i16(i16 %startval, i32 * %ptr) {
+entry:
+  br label %for.body
+
+for.body:
+  %add.i7 = phi i16 [ %startval, %entry ], [ %add.i, %for.body ]
+  %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
+  %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
+  %add.i = add i16 %add.i7, -1
+  %kind_.i = getelementptr inbounds i32, i32* %ptr, i16 %add.i
+  %tmp.i1 = load i32, i32* %kind_.i, align 4
+  %inc.redux = add i32 %tmp.i1, %redux5
+  %inc4 = add i32 %i.06, 1
+  %exitcond = icmp ne i32 %inc4, 1024
+  br i1 %exitcond, label %for.body, label %loopend
+
+loopend:
+  ret i32 %inc.redux
+}
+
+
+ at a = common global [1024 x i32] zeroinitializer, align 16
+
+; We incorrectly transformed this loop into an empty one because we left the
+; induction variable in i8 type and truncated the exit value 1024 to 0.
+; int a[1024];
+;
+; void fail() {
+;   int reverse_induction = 1023;
+;   unsigned char forward_induction = 0;
+;   while ((reverse_induction) >= 0) {
+;     forward_induction++;
+;     a[reverse_induction] = forward_induction;
+;     --reverse_induction;
+;   }
+; }
+
+; CHECK-LABEL: @reverse_forward_induction_i64_i8(
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = sub i64 1023, %index
+; CHECK: %[[a0:.+]] = add i64 %offset.idx, 0
+; CHECK: %[[a4:.+]] = add i64 %offset.idx, -4
+
+define void @reverse_forward_induction_i64_i8() {
+entry:
+  br label %while.body
+
+while.body:
+  %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ]
+  %forward_induction.05 = phi i8 [ 0, %entry ], [ %inc, %while.body ]
+  %inc = add i8 %forward_induction.05, 1
+  %conv = zext i8 %inc to i32
+  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %0 = trunc i64 %indvars.iv to i32
+  %cmp = icmp sgt i32 %0, 0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.end:
+  ret void
+}
+
+; CHECK-LABEL: @reverse_forward_induction_i64_i8_signed(
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = sub i64 1023, %index
+; CHECK: %[[a0:.+]] = add i64 %offset.idx, 0
+; CHECK: %[[a4:.+]] = add i64 %offset.idx, -4
+
+define void @reverse_forward_induction_i64_i8_signed() {
+entry:
+  br label %while.body
+
+while.body:
+  %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ]
+  %forward_induction.05 = phi i8 [ -127, %entry ], [ %inc, %while.body ]
+  %inc = add i8 %forward_induction.05, 1
+  %conv = sext i8 %inc to i32
+  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %0 = trunc i64 %indvars.iv to i32
+  %cmp = icmp sgt i32 %0, 0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/reverse_iter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/reverse_iter.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/reverse_iter.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/reverse_iter.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,45 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; PR15882: This test ensures that we do not produce wrapping arithmetic when
+; creating constant reverse step vectors.
+;
+; int foo(int n, int *A) {
+;   int sum;
+;   for (int i=n; i > 0; i--)
+;     sum += A[i*2];
+;   return sum;
+; }
+;
+
+;CHECK-LABEL: @foo(
+;CHECK:  <i32 0, i32 -1, i32 -2, i32 -3>
+;CHECK: ret
+define i32 @foo(i32 %n, i32* nocapture %A) {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0
+  %2 = sext i32 %n to i64
+  br label %3
+
+; <label>:3                                       ; preds = %.lr.ph, %3
+  %indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ]
+  %sum.01 = phi i32 [ undef, %.lr.ph ], [ %9, %3 ]
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = shl nsw i32 %4, 1
+  %6 = sext i32 %5 to i64
+  %7 = getelementptr inbounds i32, i32* %A, i64 %6
+  %8 = load i32, i32* %7, align 4
+  %9 = add nsw i32 %8, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %10 = trunc i64 %indvars.iv.next to i32
+  %11 = icmp sgt i32 %10, 0
+  br i1 %11, label %3, label %._crit_edge
+
+._crit_edge:                                      ; preds = %3, %0
+  %sum.0.lcssa = phi i32 [ undef, %0 ], [ %9, %3 ]
+  ret i32 %sum.0.lcssa
+}
+

Added: llvm/trunk/test/Transforms/LoopVectorize/runtime-check-address-space.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/runtime-check-address-space.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/runtime-check-address-space.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/runtime-check-address-space.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,221 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+
+; Check vectorization that would ordinarily require a runtime bounds
+; check on the pointers when mixing address spaces. For now we cannot
+; assume address spaces do not alias, and we can't assume that
+; different pointers are directly comparable.
+;
+; These all test this basic loop for different combinations of address
+; spaces, and swapping in globals or adding noalias.
+;
+;void foo(int addrspace(N)* [noalias] a, int addrspace(M)* [noalias] b, int n)
+;{
+;    for (int i = 0; i < n; ++i)
+;    {
+;        a[i] = 3 * b[i];
+;    }
+;}
+
+; Artificial datalayout
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+ at g_as1 = common addrspace(1) global [1024 x i32] zeroinitializer, align 16
+ at q_as2 = common addrspace(2) global [1024 x i32] zeroinitializer, align 16
+
+; Both parameters are unidentified objects with the same address
+; space, so this should vectorize normally.
+define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+; CHECK-LABEL: @foo(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %idxprom = sext i32 %i.02 to i64
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.02 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; Parameters are unidentified and different address spaces, so cannot vectorize.
+define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+; CHECK-LABEL: @bar0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %idxprom = sext i32 %i.02 to i64
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.02 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; Swapped arguments should be the same
+define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @bar1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %idxprom = sext i32 %i.02 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.02 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; We should still be able to vectorize with noalias even if the
+; address spaces are different.
+define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 {
+; CHECK-LABEL: @bar2(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %idxprom = sext i32 %i.02 to i64
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.02 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; Store to identified global with different address space. This isn't
+; generally safe and shouldn't be vectorized.
+define void @arst0(i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @arst0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %idxprom = sext i32 %i.02 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.02 to i64
+  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+; Load from identified global with different address space.
+; This isn't generally safe and shouldn't be vectorized.
+define void @arst1(i32* %b, i32 %n) #0 {
+; CHECK-LABEL: @arst1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %idxprom = sext i32 %i.02 to i64
+  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.02 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; Read and write to 2 identified globals in different address
+; spaces. This should be vectorized.
+define void @aoeu(i32 %n) #0 {
+; CHECK-LABEL: @aoeu(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %idxprom = sext i32 %i.02 to i64
+  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
+  %0 = load i32, i32 addrspace(2)* %arrayidx, align 4
+  %mul = mul nsw i32 %0, 3
+  %idxprom1 = sext i32 %i.02 to i64
+  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }

Added: llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,132 @@
+; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+
+; Artificial datalayout
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+
+
+define void @add_ints_1_1_1(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+; CHECK-LABEL: @add_ints_1_1_1(
+; CHECK: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %i.01
+  %1 = load i32, i32 addrspace(1)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %i.01
+  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add i64 %i.01, 1
+  %cmp = icmp ult i64 %inc, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+define void @add_ints_as_1_0_0(i32 addrspace(1)* %a, i32* %b, i32* %c) #0 {
+; CHECK-LABEL: @add_ints_as_1_0_0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.01
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 %i.01
+  %1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %i.01
+  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %inc = add i64 %i.01, 1
+  %cmp = icmp ult i64 %inc, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+define void @add_ints_as_0_1_0(i32* %a, i32 addrspace(1)* %b, i32* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_0(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 %i.01
+  %1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.01, 1
+  %cmp = icmp ult i64 %inc, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+define void @add_ints_as_0_1_1(i32* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_1(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %i.01
+  %1 = load i32, i32 addrspace(1)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.01, 1
+  %cmp = icmp ult i64 %inc, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+define void @add_ints_as_0_1_2(i32* %a, i32 addrspace(1)* %b, i32 addrspace(2)* %c) #0 {
+; CHECK-LABEL: @add_ints_as_0_1_2(
+; CHECK-NOT: <4 x i32>
+; CHECK: ret
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(2)* %c, i64 %i.01
+  %1 = load i32, i32 addrspace(2)* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add i64 %i.01, 1
+  %cmp = icmp ult i64 %inc, 200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }

Added: llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;CHECK-LABEL: @add_ints(
+;CHECK: br
+;CHECK: getelementptr
+;CHECK-DAG: getelementptr
+;CHECK-DAG: icmp ugt
+;CHECK-DAG: icmp ugt
+;CHECK-DAG: icmp ugt
+;CHECK-DAG: icmp ugt
+;CHECK-DAG: and
+;CHECK-DAG: and
+;CHECK: br
+;CHECK: ret
+define void @add_ints(i32* nocapture %A, i32* nocapture %B, i32* nocapture %C) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx4, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 200
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,179 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Make sure we vectorize this loop:
+; int foo(float *a, float *b, int n) {
+;   for (int i=0; i<n; ++i)
+;     a[i] = b[i] * 3;
+; }
+
+define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0, !dbg !4
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]], !dbg !4
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1, !dbg !9
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg !9
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1, !dbg !9
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4, !dbg !9
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]], !dbg !9
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1, !dbg !9
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64, !dbg !9
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1, !dbg !9
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[TMP5]], !dbg !9
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr float, float* [[B:%.*]], i64 [[TMP5]], !dbg !9
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt float* [[SCEVGEP4]], [[A]], !dbg !9
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt float* [[SCEVGEP]], [[B]], !dbg !9
+; CHECK-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]], !dbg !9
+; CHECK-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], !dbg !9
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588, !dbg !9
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]], !dbg !9
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], !dbg !9
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]], !dbg !9
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float* [[TMP7]] to <4 x float>*, !dbg !9
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 4, !dbg !9, !alias.scope !10
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, !dbg !9
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]], !dbg !9
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>*, !dbg !9
+; CHECK-NEXT:    store <4 x float> [[TMP9]], <4 x float>* [[TMP11]], align 4, !dbg !9, !alias.scope !13, !noalias !10
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4, !dbg !9
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg !9
+; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !dbg !9, !llvm.loop !15
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]], !dbg !9
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]], !dbg !9
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], !dbg !9
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]], !dbg !9
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg !9
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP13]], 3.000000e+00, !dbg !9
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]], !dbg !9
+; CHECK-NEXT:    store float [[MUL]], float* [[ARRAYIDX2]], align 4, !dbg !9
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1, !dbg !9
+; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32, !dbg !9
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]], !dbg !9
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !dbg !9, !llvm.loop !17
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]], !dbg !18
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i32 undef, !dbg !18
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0, !dbg !6
+  br i1 %cmp6, label %for.body, label %for.end, !dbg !6
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ], !dbg !7
+  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv, !dbg !7
+  %0 = load float, float* %arrayidx, align 4, !dbg !7
+  %mul = fmul float %0, 3.000000e+00, !dbg !7
+  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv, !dbg !7
+  store float %mul, float* %arrayidx2, align 4, !dbg !7
+  %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !7
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !7
+  %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !7
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !7
+
+for.end:                                          ; preds = %for.body, %entry
+  ret i32 undef, !dbg !8
+}
+
+; Make sure that we try to vectorize loops with a runtime check if the
+; dependency check fails.
+
+; CHECK-LABEL: test_runtime_check
+; CHECK:      <4 x float>
+define void @test_runtime_check(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %ind.sum = add i64 %iv, %offset
+  %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
+  %l1 = load float, float* %arr.idx, align 4
+  %ind.sum2 = add i64 %iv, %offset2
+  %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
+  %l2 = load float, float* %arr.idx2, align 4
+  %m = fmul fast float %b, %l2
+  %ad = fadd fast float %l1, %m
+  store float %ad, float* %arr.idx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+  ret void
+}
+
+; Check we do not generate runtime checks if we found a known dependence preventing
+; vectorization. In this case, it is a read of c[i-1] followed by a write of c[i].
+; The runtime checks would always fail.
+
+; void test_runtime_check2(float *a, float b, unsigned offset, unsigned offset2, unsigned n, float *c) {
+;   for (unsigned i = 1; i < n; i++) {
+;     a[i+o1] += a[i+o2] + b;
+;     c[i] = c[i-1] + b;
+;   }
+; }
+;
+; CHECK-LABEL: test_runtime_check2
+; CHECK-NOT:      <4 x float>
+define void @test_runtime_check2(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n, float* %c) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %ind.sum = add i64 %iv, %offset
+  %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
+  %l1 = load float, float* %arr.idx, align 4
+  %ind.sum2 = add i64 %iv, %offset2
+  %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
+  %l2 = load float, float* %arr.idx2, align 4
+  %m = fmul fast float %b, %l2
+  %ad = fadd fast float %l1, %m
+  store float %ad, float* %arr.idx, align 4
+  %c.ind = add i64 %iv, -1
+  %c.idx = getelementptr inbounds float, float* %c, i64 %c.ind
+  %lc = load float, float* %c.idx, align 4
+  %vc = fadd float %lc, 1.0
+  %c.idx2 = getelementptr inbounds float, float* %c, i64 %iv
+  store float %vc, float* %c.idx2
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+  ret void
+}
+
+; CHECK: !9 = !DILocation(line: 101, column: 1, scope: !{{.*}})
+
+!llvm.module.flags = !{!0, !1}
+!llvm.dbg.cu = !{!9}
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+
+!2 = !{}
+!3 = !DISubroutineType(types: !2)
+!4 = !DIFile(filename: "test.cpp", directory: "/tmp")
+!5 = distinct !DISubprogram(name: "foo", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, unit: !9, retainedNodes: !2)
+!6 = !DILocation(line: 100, column: 1, scope: !5)
+!7 = !DILocation(line: 101, column: 1, scope: !5)
+!8 = !DILocation(line: 102, column: 1, scope: !5)
+!9 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
+                             file: !10,
+                             isOptimized: true, flags: "-O2",
+                             splitDebugFilename: "abc.debug", emissionKind: 2)
+!10 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
+!11 = !{i32 2, !"Debug Info Version", i32 3}

Added: llvm/trunk/test/Transforms/LoopVectorize/runtime-limit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/runtime-limit.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/runtime-limit.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/runtime-limit.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,101 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s -check-prefix=OVERRIDE
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -pragma-vectorize-memory-check-threshold=6 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; First loop produced diagnostic pass remark.
+;CHECK: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
+; Second loop produces diagnostic analysis remark.
+;CHECK: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations
+
+; First loop produced diagnostic pass remark.
+;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
+; Second loop produces diagnostic pass remark.
+;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
+
+; We are vectorizing with 6 runtime checks.
+;CHECK-LABEL: func1x6(
+;CHECK: <4 x i32>
+;CHECK: ret
+;OVERRIDE-LABEL: func1x6(
+;OVERRIDE: <4 x i32>
+;OVERRIDE: ret
+define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.016 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.016
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 %i.016
+  %1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %i.016
+  %2 = load i32, i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %add, %2
+  %arrayidx4 = getelementptr inbounds i32, i32* %E, i64 %i.016
+  %3 = load i32, i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %add3, %3
+  %arrayidx6 = getelementptr inbounds i32, i32* %F, i64 %i.016
+  %4 = load i32, i32* %arrayidx6, align 4
+  %add7 = add nsw i32 %add5, %4
+  %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %i.016
+  store i32 %add7, i32* %arrayidx8, align 4
+  %inc = add i64 %i.016, 1
+  %exitcond = icmp eq i64 %inc, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 undef
+}
+
+; We are not vectorizing with 12 runtime checks.
+;CHECK-LABEL: func2x6(
+;CHECK-NOT: <4 x i32>
+;CHECK: ret
+; We vectorize with 12 checks if a vectorization hint is provided.
+;OVERRIDE-LABEL: func2x6(
+;OVERRIDE: <4 x i32>
+;OVERRIDE: ret
+define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.037 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.037
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 %i.037
+  %1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %i.037
+  %2 = load i32, i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %add, %2
+  %arrayidx4 = getelementptr inbounds i32, i32* %E, i64 %i.037
+  %3 = load i32, i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %add3, %3
+  %arrayidx6 = getelementptr inbounds i32, i32* %F, i64 %i.037
+  %4 = load i32, i32* %arrayidx6, align 4
+  %add7 = add nsw i32 %add5, %4
+  %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %i.037
+  store i32 %add7, i32* %arrayidx8, align 4
+  %5 = load i32, i32* %arrayidx, align 4
+  %6 = load i32, i32* %arrayidx1, align 4
+  %add11 = add nsw i32 %6, %5
+  %7 = load i32, i32* %arrayidx2, align 4
+  %add13 = add nsw i32 %add11, %7
+  %8 = load i32, i32* %arrayidx4, align 4
+  %add15 = add nsw i32 %add13, %8
+  %9 = load i32, i32* %arrayidx6, align 4
+  %add17 = add nsw i32 %add15, %9
+  %arrayidx18 = getelementptr inbounds i32, i32* %out2, i64 %i.037
+  store i32 %add17, i32* %arrayidx18, align 4
+  %inc = add i64 %i.037, 1
+  %exitcond = icmp eq i64 %inc, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 undef
+}
+

Added: llvm/trunk/test/Transforms/LoopVectorize/safegep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/safegep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/safegep.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/safegep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,61 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1  < %s |  FileCheck %s
+target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+
+
+; We can vectorize this code because if the address computation would wrap then
+; a load from 0 would take place which is undefined behaviour in address space 0
+; according to LLVM IR semantics.
+
+; PR16592
+
+; CHECK-LABEL: @safe(
+; CHECK: <4 x float>
+
+define void @safe(float* %A, float* %B, float %K) {
+entry:
+  br label %"<bb 3>"
+
+"<bb 3>":
+  %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
+  %pp3 = getelementptr float, float* %A, i32 %i_15
+  %D.1396_10 = load float, float* %pp3, align 4
+  %pp24 = getelementptr float, float* %B, i32 %i_15
+  %D.1398_15 = load float, float* %pp24, align 4
+  %D.1399_17 = fadd float %D.1398_15, %K
+  %D.1400_18 = fmul float %D.1396_10, %D.1399_17
+  store float %D.1400_18, float* %pp3, align 4
+  %i_19 = add nsw i32 %i_15, 1
+  %exitcond = icmp ne i32 %i_19, 64
+  br i1 %exitcond, label %"<bb 3>", label %return
+
+return:
+  ret void
+}
+
+; In a non-default address space we don't have this rule.
+
+; CHECK-LABEL: @notsafe(
+; CHECK-NOT: <4 x float>
+
+define void @notsafe(float addrspace(5) * %A, float* %B, float %K) {
+entry:
+  br label %"<bb 3>"
+
+"<bb 3>":
+  %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
+  %pp3 = getelementptr float, float addrspace(5) * %A, i32 %i_15
+  %D.1396_10 = load float, float addrspace(5) * %pp3, align 4
+  %pp24 = getelementptr float, float* %B, i32 %i_15
+  %D.1398_15 = load float, float* %pp24, align 4
+  %D.1399_17 = fadd float %D.1398_15, %K
+  %D.1400_18 = fmul float %D.1396_10, %D.1399_17
+  store float %D.1400_18, float addrspace(5) * %pp3, align 4
+  %i_19 = add nsw i32 %i_15, 1
+  %exitcond = icmp ne i32 %i_19, 64
+  br i1 %exitcond, label %"<bb 3>", label %return
+
+return:
+  ret void
+}
+
+

Added: llvm/trunk/test/Transforms/LoopVectorize/same-base-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/same-base-access.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/same-base-access.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/same-base-access.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,107 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; This is kernel11 from "LivermoreLoops". We can't vectorize it because we
+; access both x[k] and x[k-1].
+;
+; void kernel11(double *x, double *y, int n) {
+;   for ( int k=1 ; k<n ; k++ )
+;     x[k] = x[k-1] + y[k];
+; }
+
+; CHECK-LABEL: @kernel11(
+; CHECK-NOT: <4 x double>
+; CHECK: ret
+define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
+  %1 = alloca double*, align 8
+  %2 = alloca double*, align 8
+  %3 = alloca i32, align 4
+  %k = alloca i32, align 4
+  store double* %x, double** %1, align 8
+  store double* %y, double** %2, align 8
+  store i32 %n, i32* %3, align 4
+  store i32 1, i32* %k, align 4
+  br label %4
+
+; <label>:4                                       ; preds = %25, %0
+  %5 = load i32, i32* %k, align 4
+  %6 = load i32, i32* %3, align 4
+  %7 = icmp slt i32 %5, %6
+  br i1 %7, label %8, label %28
+
+; <label>:8                                       ; preds = %4
+  %9 = load i32, i32* %k, align 4
+  %10 = sub nsw i32 %9, 1
+  %11 = sext i32 %10 to i64
+  %12 = load double*, double** %1, align 8
+  %13 = getelementptr inbounds double, double* %12, i64 %11
+  %14 = load double, double* %13, align 8
+  %15 = load i32, i32* %k, align 4
+  %16 = sext i32 %15 to i64
+  %17 = load double*, double** %2, align 8
+  %18 = getelementptr inbounds double, double* %17, i64 %16
+  %19 = load double, double* %18, align 8
+  %20 = fadd double %14, %19
+  %21 = load i32, i32* %k, align 4
+  %22 = sext i32 %21 to i64
+  %23 = load double*, double** %1, align 8
+  %24 = getelementptr inbounds double, double* %23, i64 %22
+  store double %20, double* %24, align 8
+  br label %25
+
+; <label>:25                                      ; preds = %8
+  %26 = load i32, i32* %k, align 4
+  %27 = add nsw i32 %26, 1
+  store i32 %27, i32* %k, align 4
+  br label %4
+
+; <label>:28                                      ; preds = %4
+  ret i32 0
+}
+
+
+; A[i*7] is scalarized, and the different scalars can in theory wrap
+; around and overwrite other scalar elements. However we can still
+; vectorize because we can version the loop to avoid this case.
+; 
+; void foo(int *a) {
+;   for (int i=0; i<256; ++i) {
+;     int x = a[i*7];
+;     if (x>3)
+;       x = x*x+x*4;
+;     a[i*7] = x+3;
+;   }
+; }
+
+; CHECK-LABEL: @func2(
+; CHECK: <4 x i32>
+; CHECK: ret
+define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %7, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %7 ]
+  %2 = mul nsw i64 %indvars.iv, 7
+  %3 = getelementptr inbounds i32, i32* %a, i64 %2
+  %4 = load i32, i32* %3, align 4
+  %5 = icmp sgt i32 %4, 3
+  br i1 %5, label %6, label %7
+
+; <label>:6                                       ; preds = %1
+  %tmp = add i32 %4, 4
+  %tmp1 = mul i32 %tmp, %4
+  br label %7
+
+; <label>:7                                       ; preds = %6, %1
+  %x.0 = phi i32 [ %tmp1, %6 ], [ %4, %1 ]
+  %8 = add nsw i32 %x.0, 3
+  store i32 %8, i32* %3, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %9, label %1
+
+; <label>:9                                       ; preds = %7
+  ret i32 0
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/scalar-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/scalar-select.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/scalar-select.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/scalar-select.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+ at a = common global [2048 x i32] zeroinitializer, align 16
+ at b = common global [2048 x i32] zeroinitializer, align 16
+ at c = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK-LABEL: @example1(
+;CHECK: load <4 x i32>
+; make sure that we have a scalar condition and a vector operand.
+;CHECK: select i1 %cond, <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1(i1 %cond) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32, i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %sel = select i1 %cond, i32 %6, i32 zeroinitializer
+  store i32 %sel, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopVectorize/scalar_after_vectorization.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/scalar_after_vectorization.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/scalar_after_vectorization.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/scalar_after_vectorization.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,74 @@
+; RUN: opt < %s -force-vector-width=4 -force-vector-interleave=2 -loop-vectorize -instcombine -S | FileCheck %s
+; RUN: opt < %s -force-vector-width=4 -force-vector-interleave=2 -loop-vectorize -S | FileCheck %s --check-prefix=NO-IC
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: @scalar_after_vectorization_0
+;
+; CHECK: vector.body:
+; CHECK:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK:   %offset.idx = or i64 %index, 1
+; CHECK:   %[[T2:.+]] = add nuw nsw i64 %offset.idx, %tmp0
+; CHECK:   %[[T3:.+]] = sub nsw i64 %[[T2]], %x
+; CHECK:   %[[T4:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T3]]
+; CHECK:   %[[T5:.+]] = bitcast i32* %[[T4]] to <4 x i32>*
+; CHECK:   load <4 x i32>, <4 x i32>* %[[T5]], align 4
+; CHECK:   %[[T6:.+]] = getelementptr inbounds i32, i32* %[[T4]], i64 4
+; CHECK:   %[[T7:.+]] = bitcast i32* %[[T6]] to <4 x i32>*
+; CHECK:   load <4 x i32>, <4 x i32>* %[[T7]], align 4
+; CHECK:   br {{.*}}, label %middle.block, label %vector.body
+;
+; NO-IC-LABEL: @scalar_after_vectorization_0
+;
+; NO-IC: vector.body:
+; NO-IC:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; NO-IC:   %offset.idx = add i64 1, %index
+; NO-IC:   %[[T2:.+]] = add i64 %offset.idx, 0
+; NO-IC:   %[[T3:.+]] = add i64 %offset.idx, 4
+; NO-IC:   %[[T4:.+]] = add nuw nsw i64 %[[T2]], %tmp0
+; NO-IC:   %[[T5:.+]] = add nuw nsw i64 %[[T3]], %tmp0
+; NO-IC:   %[[T6:.+]] = sub nsw i64 %[[T4]], %x
+; NO-IC:   %[[T7:.+]] = sub nsw i64 %[[T5]], %x
+; NO-IC:   %[[T8:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T6]]
+; NO-IC:   %[[T9:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T7]]
+; NO-IC:   %[[T10:.+]] = getelementptr inbounds i32, i32* %[[T8]], i32 0
+; NO-IC:   %[[T11:.+]] = bitcast i32* %[[T10]] to <4 x i32>*
+; NO-IC:   load <4 x i32>, <4 x i32>* %[[T11]], align 4
+; NO-IC:   %[[T12:.+]] = getelementptr inbounds i32, i32* %[[T8]], i32 4
+; NO-IC:   %[[T13:.+]] = bitcast i32* %[[T12]] to <4 x i32>*
+; NO-IC:   load <4 x i32>, <4 x i32>* %[[T13]], align 4
+; NO-IC:   br {{.*}}, label %middle.block, label %vector.body
+;
+define void @scalar_after_vectorization_0(i32* noalias %a, i32* noalias %b, i64 %x, i64 %y) {
+
+outer.ph:
+  br label %outer.body
+
+outer.body:
+  %i = phi i64 [ 1, %outer.ph ], [ %i.next, %inner.end ]
+  %tmp0 = mul nuw nsw i64 %i, %x
+  br label %inner.ph
+
+inner.ph:
+  br label %inner.body
+
+inner.body:
+  %j = phi i64 [ 1, %inner.ph ], [ %j.next, %inner.body ]
+  %tmp1 = add nuw nsw i64 %j, %tmp0
+  %tmp2 = sub nsw i64 %tmp1, %x
+  %tmp3 = getelementptr inbounds i32, i32* %a, i64 %tmp2
+  %tmp4 = load i32, i32* %tmp3, align 4
+  %tmp5 = getelementptr inbounds i32, i32* %b, i64 %tmp1
+  store i32 %tmp4, i32* %tmp5, align 4
+  %j.next = add i64 %j, 1
+  %cond.j = icmp slt i64 %j.next, %y
+  br i1 %cond.j, label %inner.body, label %inner.end
+
+inner.end:
+  %i.next = add i64 %i, 1
+  %cond.i = icmp slt i64 %i.next, %y
+  br i1 %cond.i, label %outer.body, label %outer.end
+
+outer.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/scev-exitlim-crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/scev-exitlim-crash.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/scev-exitlim-crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,113 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=8 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+ at b = common global i32 0, align 4
+ at f = common global i32 0, align 4
+ at a = common global i32 0, align 4
+ at d = common global i32* null, align 8
+ at e = common global i32* null, align 8
+ at c = common global i32 0, align 4
+
+; CHECK-LABEL: @fn1(
+; CHECK: vector.body
+define void @fn1() #0 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %i.0 = phi i32 [ undef, %entry ], [ %inc, %for.cond ]
+  %cmp = icmp slt i32 %i.0, 0
+  %call = tail call i32 @fn2(double fadd (double fsub (double undef, double undef), double 1.000000e+00)) #2
+  %inc = add nsw i32 %i.0, 1
+  br i1 %cmp, label %for.cond, label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond
+  %call.lcssa = phi i32 [ %call, %for.cond ]
+  %cmp514 = icmp sgt i32 %call.lcssa, 0
+  br i1 %cmp514, label %for.cond7.preheader.lr.ph, label %for.end26
+
+for.cond7.preheader.lr.ph:                        ; preds = %for.cond4.preheader
+  %0 = load i32*, i32** @e, align 8, !tbaa !4
+  br label %for.cond7.preheader
+
+for.cond7.preheader:                              ; preds = %for.cond7.preheader.lr.ph, %for.inc23
+  %y.017 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %inc24, %for.inc23 ]
+  %i.116 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %i.2.lcssa, %for.inc23 ]
+  %n.015 = phi i32 [ undef, %for.cond7.preheader.lr.ph ], [ %inc25, %for.inc23 ]
+  %1 = load i32, i32* @b, align 4, !tbaa !5
+  %tobool11 = icmp eq i32 %1, 0
+  br i1 %tobool11, label %for.inc23, label %for.body8.lr.ph
+
+for.body8.lr.ph:                                  ; preds = %for.cond7.preheader
+  %add9 = add i32 %n.015, 1
+  br label %for.body8
+
+for.body8:                                        ; preds = %for.body8.lr.ph, %for.inc19
+  %indvars.iv19 = phi i64 [ 0, %for.body8.lr.ph ], [ %indvars.iv.next20, %for.inc19 ]
+  %i.213 = phi i32 [ %i.116, %for.body8.lr.ph ], [ 0, %for.inc19 ]
+  %2 = trunc i64 %indvars.iv19 to i32
+  %add10 = add i32 %add9, %2
+  store i32 %add10, i32* @f, align 4, !tbaa !5
+  %idx.ext = sext i32 %add10 to i64
+  %add.ptr = getelementptr inbounds i32, i32* @a, i64 %idx.ext
+  %tobool129 = icmp eq i32 %i.213, 0
+  br i1 %tobool129, label %for.inc19, label %for.body13.lr.ph
+
+for.body13.lr.ph:                                 ; preds = %for.body8
+  %3 = sext i32 %i.213 to i64
+  br label %for.body13
+
+for.body13:                                       ; preds = %for.body13.lr.ph, %for.body13
+  %indvars.iv = phi i64 [ %3, %for.body13.lr.ph ], [ %indvars.iv.next, %for.body13 ]
+  %add.ptr.sum = add i64 %idx.ext, %indvars.iv
+  %arrayidx = getelementptr inbounds i32, i32* @a, i64 %add.ptr.sum
+  %4 = load i32, i32* %arrayidx, align 4, !tbaa !5
+  %arrayidx15 = getelementptr inbounds i32, i32* %0, i64 %indvars.iv
+  store i32 %4, i32* %arrayidx15, align 4, !tbaa !5
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %5 = trunc i64 %indvars.iv.next to i32
+  %tobool12 = icmp eq i32 %5, 0
+  br i1 %tobool12, label %for.cond11.for.inc19_crit_edge, label %for.body13
+
+for.cond11.for.inc19_crit_edge:                   ; preds = %for.body13
+  br label %for.inc19
+
+for.inc19:                                        ; preds = %for.cond11.for.inc19_crit_edge, %for.body8
+  %6 = load i32, i32* @c, align 4, !tbaa !5
+  %inc20 = add nsw i32 %6, 1
+  store i32 %inc20, i32* @c, align 4, !tbaa !5
+  %indvars.iv.next20 = add i64 %indvars.iv19, 1
+  %7 = load i32, i32* @b, align 4, !tbaa !5
+  %tobool = icmp eq i32 %7, 0
+  br i1 %tobool, label %for.cond7.for.inc23_crit_edge, label %for.body8
+
+for.cond7.for.inc23_crit_edge:                    ; preds = %for.inc19
+  %add.ptr.lcssa = phi i32* [ %add.ptr, %for.inc19 ]
+  store i32* %add.ptr.lcssa, i32** @d, align 8, !tbaa !4
+  br label %for.inc23
+
+for.inc23:                                        ; preds = %for.cond7.for.inc23_crit_edge, %for.cond7.preheader
+  %i.2.lcssa = phi i32 [ 0, %for.cond7.for.inc23_crit_edge ], [ %i.116, %for.cond7.preheader ]
+  %inc24 = add nsw i32 %y.017, 1
+  %inc25 = add nsw i32 %n.015, 1
+  %exitcond = icmp ne i32 %inc24, %call.lcssa
+  br i1 %exitcond, label %for.cond7.preheader, label %for.cond4.for.end26_crit_edge
+
+for.cond4.for.end26_crit_edge:                    ; preds = %for.inc23
+  br label %for.end26
+
+for.end26:                                        ; preds = %for.cond4.for.end26_crit_edge, %for.cond4.preheader
+  ret void
+}
+declare i32 @fn2(double) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"double", !1}
+!4 = !{!0, !0, i64 0}
+!5 = !{!3, !3, i64 0}

Added: llvm/trunk/test/Transforms/LoopVectorize/simple-unroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/simple-unroll.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/simple-unroll.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/simple-unroll.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+ at a = common global [2048 x i32] zeroinitializer, align 16
+
+; This is the loop.
+;  for (i=0; i<n; i++){
+;    a[i] += i;
+;  }
+;CHECK-LABEL: @inc(
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @inc(i32 %n) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = load i32, i32* %2, align 4
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = add nsw i32 %3, %4
+  store i32 %5, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopVectorize/skip-iterations.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/skip-iterations.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/skip-iterations.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/skip-iterations.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,181 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; tests skipping iterations within a VF through break/continue/gotos.
+
+; The main difficulty in vectorizing these loops in test1,test2 and test3 is
+; safely speculating that the widened load of A[i] should not fault if the
+; scalarized loop does not fault. For example, the
+; original load in the scalar loop may not fault, but the last iteration of the
+; vectorized load can fault (if it crosses a page boudary for example).
+; This last vector iteration is where *one* of the
+; scalar iterations lead to the early exit.
+
+; int test(int *A, int Length) {
+;   for (int i = 0; i < Length; i++) {
+;     if (A[i] > 10.0) goto end;
+;     A[i] = 0;
+;   }
+; end:
+;   return 0;
+; }
+; CHECK-LABEL: test1(
+; CHECK-NOT: <4 x i32>
+define i32 @test1(i32* nocapture %A, i32 %Length) {
+entry:
+  %cmp8 = icmp sgt i32 %Length, 0
+  br i1 %cmp8, label %for.body.preheader, label %end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %if.else
+  %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !15
+  %cmp1 = icmp sgt i32 %0, 10
+  br i1 %cmp1, label %end.loopexit, label %if.else
+
+if.else:                                          ; preds = %for.body
+  store i32 0, i32* %arrayidx, align 4, !tbaa !15
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, %Length
+  br i1 %cmp, label %for.body, label %end.loopexit
+
+end.loopexit:                                     ; preds = %if.else, %for.body
+  br label %end
+
+end:                                              ; preds = %end.loopexit, %entry
+  ret i32 0
+}
+
+; We don't use anything from within the loop at the early exit path
+; so we do not need to know which iteration caused the early exit path.
+; bool test2(int *A, int Length, int K) {
+;   for (int i = 0; i < Length; i++) {
+;     if (A[i] == K) return true;
+;   }
+;   return false;
+; }
+; TODO: Today we do not vectorize this, but we could teach the vectorizer, once
+; the hard part of proving/speculating A[i:VF - 1] loads does not fault is handled by the
+; compiler/hardware.
+
+; CHECK-LABEL: test2(
+; CHECK-NOT: <4 x i32>
+define i32 @test2(i32* nocapture %A, i32 %Length, i32 %K) {
+entry:
+  %cmp8 = icmp sgt i32 %Length, 0
+  br i1 %cmp8, label %for.body.preheader, label %end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %if.else
+  %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %ld = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp eq i32 %ld, %K
+  br i1 %cmp1, label %end.loopexit, label %if.else
+
+if.else:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %trunc = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %trunc, %Length
+  br i1 %cmp, label %for.body, label %end.loopexit
+
+end.loopexit:                                     ; preds = %if.else, %for.body
+  %result.lcssa = phi i32 [ 1, %for.body ], [ 0, %if.else ]
+  br label %end
+
+end:                                              ; preds = %end.loopexit, %entry
+  %result = phi i32 [ %result.lcssa, %end.loopexit ], [ 0, %entry ]
+  ret i32 %result
+}
+
+; We use the IV in the early exit
+; so we need to know which iteration caused the early exit path.
+; int test3(int *A, int Length, int K) {
+;   for (int i = 0; i < Length; i++) {
+;     if (A[i] == K) return i;
+;   }
+;   return -1;
+; }
+; TODO: Today we do not vectorize this, but we could teach the vectorizer (once
+; we handle the speculation safety of the widened load).
+; CHECK-LABEL: test3(
+; CHECK-NOT: <4 x i32>
+define i32 @test3(i32* nocapture %A, i32 %Length, i32 %K) {
+entry:
+  %cmp8 = icmp sgt i32 %Length, 0
+  br i1 %cmp8, label %for.body.preheader, label %end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %if.else
+  %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %ld = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp eq i32 %ld, %K
+  br i1 %cmp1, label %end.loopexit, label %if.else
+
+if.else:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %trunc = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %trunc, %Length
+  br i1 %cmp, label %for.body, label %end.loopexit
+
+end.loopexit:                                     ; preds = %if.else, %for.body
+  %result.lcssa = phi i64 [ %indvars.iv, %for.body ], [ -1, %if.else ]
+  %res.trunc = trunc i64 %result.lcssa to i32
+  br label %end
+
+end:                                              ; preds = %end.loopexit, %entry
+  %result = phi i32 [ %res.trunc, %end.loopexit ], [ -1, %entry ]
+  ret i32 %result
+}
+
+; bool test4(int *A, int Length, int K, int J) {
+;   for (int i = 0; i < Length; i++) {
+;     if (A[i] == K) continue;
+;     A[i] = J;
+;   }
+; }
+; For this test, we vectorize and generate predicated stores to A[i].
+; CHECK-LABEL: test4(
+; CHECK: <4 x i32>
+define void @test4(i32* nocapture %A, i32 %Length, i32 %K, i32 %J) {
+entry:
+  %cmp8 = icmp sgt i32 %Length, 0
+  br i1 %cmp8, label %for.body.preheader, label %end.loopexit
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %if.else
+  %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %trunc = trunc i64 %indvars.iv.next to i32
+  %ld = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp eq i32 %ld, %K
+  br i1 %cmp1, label %latch, label %if.else
+
+if.else:
+  store i32 %J, i32* %arrayidx, align 4
+  br label %latch
+
+latch:                                          ; preds = %for.body
+  %cmp = icmp slt i32 %trunc, %Length
+  br i1 %cmp, label %for.body, label %end.loopexit
+
+end.loopexit:                                     ; preds = %if.else, %for.body
+  ret void
+}
+!15 = !{!16, !16, i64 0}
+!16 = !{!"int", !17, i64 0}
+!17 = !{!"omnipotent char", !18, i64 0}
+!18 = !{!"Simple C/C++ TBAA"}

Added: llvm/trunk/test/Transforms/LoopVectorize/small-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/small-loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/small-loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/small-loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,57 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+ at a = common global [2048 x i32] zeroinitializer, align 16
+ at b = common global [2048 x i32] zeroinitializer, align 16
+ at c = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK-LABEL: @example1(
+;CHECK: load <4 x i32>
+;CHECK: ret void
+define void @example1() nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32, i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %6, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 8  ;   <-----  A really small trip count
+  br i1 %exitcond, label %8, label %1      ;           w/o scalar iteration overhead.
+
+; <label>:8                                       ; preds = %1
+  ret void
+}
+
+;CHECK-LABEL: @bound1(
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret void
+define void @bound1(i32 %k) nounwind uwtable ssp {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
+  %5 = load i32, i32* %4, align 4
+  %6 = add nsw i32 %5, %3
+  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %6, i32* %7, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %large = icmp sge i32 %lftr.wideiv, 8
+  %exitcond = icmp eq i32 %lftr.wideiv, %k
+  %realexit = or i1 %large, %exitcond 
+  br i1 %realexit, label %8, label %1
+
+; <label>:8                                       ; preds = %1
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/start-non-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/start-non-zero.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/start-non-zero.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/start-non-zero.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;CHECK-LABEL: @start_at_nonzero(
+;CHECK: mul nuw <4 x i32>
+;CHECK: ret i32
+define i32 @start_at_nonzero(i32* nocapture %a, i32 %start, i32 %end) nounwind uwtable ssp {
+entry:
+  %cmp3 = icmp slt i32 %start, %end
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = sext i32 %start to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx, align 4
+  %mul = mul nuw i32 %1, 333
+  store i32 %mul, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %2 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %2, %end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret i32 4
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/store-shuffle-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/store-shuffle-bug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/store-shuffle-bug.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/store-shuffle-bug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,49 @@
+; RUN: opt -S -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+ at uf = common global [100 x i32] zeroinitializer, align 16
+ at xi = common global [100 x i32] zeroinitializer, align 16
+ at q = common global [100 x i32] zeroinitializer, align 16
+
+; PR16455
+
+
+; Due to a bug in the way we handled reverse induction stores we would generate
+; a shuffle too many.
+
+define void @t()  {
+entry:
+  br label %for.body
+
+; CHECK-LABEL: @t(
+; CHECK: vector.body:
+; CHECK: [[VAR1:%[a-zA-Z0-9.]+]] = load <4 x i32>
+; CHECK: [[VAR2:%[a-zA-Z0-9.]+]] = load <4 x i32>
+; CHECK: [[VAR3:%[a-zA-Z0-9]+]] = add nsw <4 x i32> [[VAR2]], [[VAR1]]
+; CHECK: store <4 x i32> [[VAR3]]
+; CHECK: [[VAR4:%[a-zA-Z0-9.]+]] = load <4 x i32>
+; CHECK: add nsw <4 x i32> [[VAR3]], [[VAR4]]
+; CHECK-NOT: shufflevector
+
+for.body:
+  %indvars.iv = phi i64 [ 93, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = add i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @uf, i64 0, i64 %0
+  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* @xi, i64 0, i64 %0
+  %1 = load i32, i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %add4 = add nsw i32 %2, %1
+  store i32 %add4, i32* %arrayidx, align 4
+  %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* @q, i64 0, i64 %0
+  %3 = load i32, i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %add4, %3
+  store i32 %add8, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, -1
+  %4 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp ugt i32 %4, 2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/struct_access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/struct_access.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/struct_access.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/struct_access.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,87 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.coordinate = type { i32, i32 }
+
+; Make sure that we don't generate a wide load when accessing the struct.
+; struct coordinate {
+;  int x;
+;  int y;
+; };
+;
+;
+; int foo(struct coordinate *A, int n) {
+;
+;   int sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += A[i].x;
+;
+;   return sum;
+; }
+
+;CHECK-LABEL: @foo(
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret
+define i32 @foo(%struct.coordinate* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %x = getelementptr inbounds %struct.coordinate, %struct.coordinate* %A, i64 %indvars.iv, i32 0
+  %0 = load i32, i32* %x, align 4
+  %add = add nsw i32 %0, %sum.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+%struct.lit = type { i32 }
+
+; Verify that we still vectorize the access if the struct has the same size as
+; the loaded element.
+; struct lit {
+;  int x;
+; };
+;
+;
+; int bar(struct lit *A, int n) {
+;
+;   int sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += A[i].x;
+;
+;   return sum;
+; }
+
+;CHECK-LABEL: @bar(
+;CHECK: load <4 x i32>
+;CHECK: ret
+define i32 @bar(%struct.lit* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %x = getelementptr inbounds %struct.lit, %struct.lit* %A, i64 %indvars.iv, i32 0
+  %0 = load i32, i32* %x, align 4
+  %add = add nsw i32 %0, %sum.05
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/tbaa-nodep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/tbaa-nodep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/tbaa-nodep.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/tbaa-nodep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,101 @@
+; RUN: opt < %s  -tbaa -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s --check-prefix=CHECK-NOTBAA
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4, !tbaa !0
+  %conv = fptosi float %0 to i32
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+
+; TBAA partitions the accesses in this loop, so it can be vectorized without
+; runtime checks.
+
+; CHECK-LABEL: @test1
+; CHECK: entry:
+; CHECK-NEXT: br label %vector.body
+; CHECK: vector.body:
+
+; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
+
+; CHECK: ret i32 0
+
+; CHECK-NOTBAA-LABEL: @test1
+; CHECK-NOTBAA: icmp ugt i32*
+
+; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
+
+; CHECK-NOTBAA: ret i32 0
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test2(i32* nocapture readonly %a, float* nocapture readonly %b, float* nocapture %c) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4, !tbaa !4
+  %conv = sitofp i32 %1 to float
+  %mul = fmul float %0, %conv
+  %arrayidx4 = getelementptr inbounds float, float* %c, i64 %indvars.iv
+  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+
+; This test is like the first, except here there is still one runtime check
+; required. Without TBAA, however, two checks are required.
+
+; CHECK-LABEL: @test2
+; CHECK: icmp ugt float*
+; CHECK: icmp ugt float*
+; CHECK-NOT: icmp uge i32*
+
+; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
+
+; CHECK: ret i32 0
+
+; CHECK-NOTBAA-LABEL: @test2
+; CHECK-NOTBAA: icmp ugt float*
+; CHECK-NOTBAA: icmp ugt float*
+; CHECK-NOTBAA-DAG: icmp ugt float*
+; CHECK-NOTBAA-DAG: icmp ugt i32*
+
+; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
+
+; CHECK-NOTBAA: ret i32 0
+}
+
+attributes #0 = { nounwind uwtable }
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"float", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"int", !2, i64 0}
+

Added: llvm/trunk/test/Transforms/LoopVectorize/tripcount.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/tripcount.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/tripcount.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/tripcount.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,211 @@
+; This test verifies that the loop vectorizer will not vectorizes low trip count
+; loops that require runtime checks (Trip count is computed with profile info).
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -loop-vectorize-with-block-frequency -S | FileCheck %s
+
+target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
+
+ at tab = common global [32 x i8] zeroinitializer, align 1
+
+define i32 @foo_low_trip_count1(i32 %bound) {
+; Simple loop with low tripcount. Should not be vectorized.
+
+; CHECK-LABEL: @foo_low_trip_count1(
+; CHECK-NOT: <{{[0-9]+}} x i8>
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  %. = select i1 %cmp1, i8 2, i8 1
+  store i8 %., i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %i.08, %bound
+  br i1 %exitcond, label %for.end, label %for.body, !prof !1
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+define i32 @foo_low_trip_count2(i32 %bound) !prof !0 {
+; The loop has a same invocation count with the function, but has a low
+; trip_count per invocation and not worth to vectorize.
+
+; CHECK-LABEL: @foo_low_trip_count2(
+; CHECK-NOT: <{{[0-9]+}} x i8>
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  %. = select i1 %cmp1, i8 2, i8 1
+  store i8 %., i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %i.08, %bound
+  br i1 %exitcond, label %for.end, label %for.body, !prof !1
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 {
+; The loop has low invocation count compare to the function invocation count,
+; but has a high trip count per invocation. Vectorize it.
+
+; CHECK-LABEL: @foo_low_trip_count3(
+; CHECK: vector.body:
+
+entry:
+  br i1 %cond, label %for.preheader, label %for.end, !prof !2
+
+for.preheader:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  %. = select i1 %cmp1, i8 2, i8 1
+  store i8 %., i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %i.08, %bound
+  br i1 %exitcond, label %for.end, label %for.body, !prof !3
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+define i32 @foo_low_trip_count_icmp_sgt(i32 %bound) {
+; Simple loop with low tripcount and inequality test for exit.
+; Should not be vectorized.
+
+; CHECK-LABEL: @foo_low_trip_count_icmp_sgt(
+; CHECK-NOT: <{{[0-9]+}} x i8>
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  %. = select i1 %cmp1, i8 2, i8 1
+  store i8 %., i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.08, 1
+  %exitcond = icmp sgt i32 %i.08, %bound
+  br i1 %exitcond, label %for.end, label %for.body, !prof !1
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+define i32 @const_low_trip_count() {
+; Simple loop with constant, small trip count and no profiling info.
+
+; CHECK-LABEL: @const_low_trip_count
+; CHECK-NOT: <{{[0-9]+}} x i8>
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  %. = select i1 %cmp1, i8 2, i8 1
+  store i8 %., i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.08, 1
+  %exitcond = icmp slt i32 %i.08, 2
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+define i32 @const_large_trip_count() {
+; Simple loop with constant large trip count and no profiling info.
+
+; CHECK-LABEL: @const_large_trip_count
+; CHECK: <{{[0-9]+}} x i8>
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  %. = select i1 %cmp1, i8 2, i8 1
+  store i8 %., i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.08, 1
+  %exitcond = icmp slt i32 %i.08, 1000
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+define i32 @const_small_trip_count_step() {
+; Simple loop with static, small trip count and no profiling info.
+
+; CHECK-LABEL: @const_small_trip_count_step
+; CHECK-NOT: <{{[0-9]+}} x i8>
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  %. = select i1 %cmp1, i8 2, i8 1
+  store i8 %., i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.08, 5
+  %exitcond = icmp slt i32 %i.08, 10
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+define i32 @const_trip_over_profile() {
+; constant trip count takes precedence over profile data
+
+; CHECK-LABEL: @const_trip_over_profile
+; CHECK: <{{[0-9]+}} x i8>
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  %. = select i1 %cmp1, i8 2, i8 1
+  store i8 %., i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.08, 1
+  %exitcond = icmp slt i32 %i.08, 1000
+  br i1 %exitcond, label %for.body, label %for.end, !prof !1
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+!0 = !{!"function_entry_count", i64 100}
+!1 = !{!"branch_weights", i32 100, i32 0}
+!2 = !{!"branch_weights", i32 10, i32 90}
+!3 = !{!"branch_weights", i32 10, i32 10000}

Added: llvm/trunk/test/Transforms/LoopVectorize/undef-inst-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/undef-inst-bug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/undef-inst-bug.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/undef-inst-bug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; We use to fail on this loop because we did not properly handle the loop
+; invariant instruction anchored in the loop when used as a getelementptr index.
+; We would use the index from the original loop resulting in a use not dominated
+; by the definition.
+
+; PR16452
+
+; Verify that we don't miscompile this loop.
+
+; CHECK-LABEL: @t(
+; CHECK: <4 x i32>
+
+define void @t() {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv17 = phi i64 [ %indvars.next, %for.body ], [ 128, %entry ]
+
+  ; Loop invariant anchored in loop.
+  %idxprom21 = zext i32 undef to i64
+
+  %arrayidx23 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* undef, i64 0, i64 %idxprom21, i64 %indvars.iv17
+  store i32 undef, i32* %arrayidx23, align 4
+  %indvars.next= add i64 %indvars.iv17, -1
+  %0 = trunc i64 %indvars.next to i32
+  %cmp15 = icmp ugt i32 %0, undef
+  br i1 %cmp15, label %for.body, label %loopexit
+
+loopexit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S | FileCheck --enable-var-scope %s
+
+; Make sure we attach memcheck metadata to scalarized memory operations even if
+; we're only unrolling.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: vector.memcheck:
+; CHECK-LABEL: vector.body:
+; CHECK: load i32, {{.*}} !alias.scope ![[$MD1:[0-9]+]]
+; CHECK-LABEL: middle.block:
+; CHECK-DAG: ![[$MD1]] = !{![[MD2:[0-9]+]]}
+; CHECK-DAG: ![[MD2]] = distinct !{![[MD2]], ![[MD3:[0-9]+]]}
+; CHECK-DAG: ![[MD3]] = distinct !{![[MD3]], !"LVerDomain"}
+
+; Function Attrs: norecurse nounwind uwtable
+define void @test(i32* nocapture readonly %a, i32* nocapture %b) local_unnamed_addr #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 77
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+attributes #0 = { norecurse nounwind uwtable }

Added: llvm/trunk/test/Transforms/LoopVectorize/unroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/unroll.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/unroll.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/unroll.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; This test makes sure that loop will not be unrolled in vectorization if VF computed
+; equals to 1.
+; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+
+; Make sure there are no geps being merged.
+; CHECK-LABEL: @foo(
+; CHECK: getelementptr
+; CHECK-NOT: getelementptr
+
+ at N = common global i32 0, align 4
+ at a = common global [1000 x i32] zeroinitializer, align 16
+
+define void @foo() #0 {
+entry:
+  %0 = load i32, i32* @N, align 4
+  %cmp5 = icmp sgt i32 %0, 0
+  br i1 %cmp5, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %conv = sext i32 %0 to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.06 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %mul = mul nuw nsw i64 %i.06, 7
+  %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @a, i64 0, i64 %mul
+  store i32 3, i32* %arrayidx, align 4
+  %inc = add nuw nsw i64 %i.06, 1
+  %cmp = icmp slt i64 %inc, %conv
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/unroll_novec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/unroll_novec.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/unroll_novec.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/unroll_novec.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,48 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-interleave=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+ at a = common global [2048 x i32] zeroinitializer, align 16
+
+; This is the loop.
+;  for (i=0; i<n; i++){
+;    a[i] += i;
+;  }
+;CHECK-LABEL: @inc(
+;CHECK: load i32, i32*
+;CHECK: load i32, i32*
+;CHECK: load i32, i32*
+;CHECK: load i32, i32*
+;CHECK-NOT: load i32, i32*
+;CHECK: add nsw i32
+;CHECK: add nsw i32
+;CHECK: add nsw i32
+;CHECK: add nsw i32
+;CHECK-NOT: add nsw i32
+;CHECK: store i32
+;CHECK: store i32
+;CHECK: store i32
+;CHECK: store i32
+;CHECK-NOT: store i32
+;CHECK: add i64 %{{.*}}, 4
+;CHECK: ret void
+define void @inc(i32 %n) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %3 = load i32, i32* %2, align 4
+  %4 = trunc i64 %indvars.iv to i32
+  %5 = add nsw i32 %3, %4
+  store i32 %5, i32* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/unsafe-dep-remark.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/unsafe-dep-remark.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/unsafe-dep-remark.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,73 @@
+; RUN: opt -loop-vectorize -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s
+
+; ModuleID = '/tmp/kk.c'
+source_filename = "/tmp/kk.c"
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+;     1	void success (char *A, char *B, char *C, char *D, char *E, int N) {
+;     2	  for(int i = 0; i < N; i++) {
+;     3	    A[i + 1] = A[i] + B[i];
+;     4	    C[i] = D[i] * E[i];
+;     5	  }
+;     6	}
+
+; CHECK: remark: /tmp/kk.c:3:16: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+
+define void @success(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) !dbg !6 {
+entry:
+  %cmp28 = icmp sgt i32 %N, 0, !dbg !8
+  br i1 %cmp28, label %for.body, label %for.cond.cleanup, !dbg !9
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !11
+  %0 = load i8, i8* %arrayidx, align 1, !dbg !11, !tbaa !12
+  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !15
+  %1 = load i8, i8* %arrayidx2, align 1, !dbg !15, !tbaa !12
+  %add = add i8 %1, %0, !dbg !16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9
+  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !17
+  store i8 %add, i8* %arrayidx7, align 1, !dbg !18, !tbaa !12
+  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !19
+  %2 = load i8, i8* %arrayidx9, align 1, !dbg !19, !tbaa !12
+  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !20
+  %3 = load i8, i8* %arrayidx12, align 1, !dbg !20, !tbaa !12
+  %mul = mul i8 %3, %2, !dbg !21
+  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !22
+  store i8 %mul, i8* %arrayidx16, align 1, !dbg !23, !tbaa !12
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9
+  %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !9
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void, !dbg !10
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "/tmp/kk.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"PIC Level", i32 2}
+!5 = !{!"clang version 3.9.0 "}
+!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 2, column: 20, scope: !6)
+!9 = !DILocation(line: 2, column: 3, scope: !6)
+!10 = !DILocation(line: 6, column: 1, scope: !6)
+!11 = !DILocation(line: 3, column: 16, scope: !6)
+!12 = !{!13, !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !DILocation(line: 3, column: 23, scope: !6)
+!16 = !DILocation(line: 3, column: 21, scope: !6)
+!17 = !DILocation(line: 3, column: 5, scope: !6)
+!18 = !DILocation(line: 3, column: 14, scope: !6)
+!19 = !DILocation(line: 4, column: 12, scope: !6)
+!20 = !DILocation(line: 4, column: 19, scope: !6)
+!21 = !DILocation(line: 4, column: 17, scope: !6)
+!22 = !DILocation(line: 4, column: 5, scope: !6)
+!23 = !DILocation(line: 4, column: 10, scope: !6)

Added: llvm/trunk/test/Transforms/LoopVectorize/unsized-pointee-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/unsized-pointee-crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/unsized-pointee-crash.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/unsized-pointee-crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @fn1
+define void @fn1() {
+entry:
+  br label %for.body
+
+for.body:
+  %b.05 = phi i32 (...)* [ undef, %entry ], [ %1, %for.body ]
+  %a.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = bitcast i32 (...)* %b.05 to i8*
+  %add.ptr = getelementptr i8, i8* %0, i64 1
+  %1 = bitcast i8* %add.ptr to i32 (...)*
+; CHECK:      %[[cst:.*]] = bitcast i32 (...)* {{.*}} to i8*
+; CHECK-NEXT: %[[gep:.*]] = getelementptr i8, i8* %[[cst]], i64 1
+  %inc = add nsw i32 %a.04, 1
+  %exitcond = icmp eq i32 %a.04, 63
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/value-ptr-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/value-ptr-bug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/value-ptr-bug.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/value-ptr-bug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,50 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; PR16073
+
+; Because we were caching value pointers across a function call that could RAUW
+; we would generate an undefined value store below:
+; SCEVExpander::expandCodeFor would change a value (the start value of an
+; induction) that we cached in the induction variable list.
+
+; CHECK-LABEL: @test_vh(
+; CHECK-NOT: store <4 x i8> undef
+
+define void @test_vh(i32* %ptr265, i32* %ptr266, i32 %sub267) {
+entry:
+  br label %loop
+
+loop:
+  %inc = phi i32 [ %sub267, %entry ], [ %add, %loop]
+  %ext.inc = sext i32 %inc to i64
+  %add.ptr265 = getelementptr inbounds i32, i32* %ptr265, i64 %ext.inc
+  %add.ptr266 = getelementptr inbounds i32, i32* %ptr266, i64 %ext.inc
+  %add = add i32 %inc, 9
+  %cmp = icmp slt i32 %add, 140
+  br i1 %cmp, label %block1, label %loop
+
+block1:
+  %sub267.lcssa = phi i32 [ %add, %loop ]
+  %add.ptr266.lcssa = phi i32* [ %add.ptr266, %loop ]
+  %add.ptr265.lcssa = phi i32* [ %add.ptr265, %loop ]
+  %tmp29 = bitcast i32* %add.ptr265.lcssa to i8*
+  %tmp30 = bitcast i32* %add.ptr266.lcssa to i8*
+  br label %do.body272
+
+do.body272:
+  %row_width.5 = phi i32 [ %sub267.lcssa, %block1 ], [ %dec, %do.body272 ]
+  %sp.4 = phi i8* [ %tmp30, %block1 ], [ %incdec.ptr273, %do.body272 ]
+  %dp.addr.4 = phi i8* [ %tmp29, %block1 ], [ %incdec.ptr274, %do.body272 ]
+  %incdec.ptr273 = getelementptr inbounds i8, i8* %sp.4, i64 1
+  %tmp31 = load i8, i8* %sp.4, align 1
+  %incdec.ptr274 = getelementptr inbounds i8, i8* %dp.addr.4, i64 1
+  store i8 %tmp31, i8* %dp.addr.4, align 1
+  %dec = add i32 %row_width.5, -1
+  %cmp276 = icmp eq i32 %dec, 0
+  br i1 %cmp276, label %loop.exit, label %do.body272
+
+loop.exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,211 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8
+; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Given a loop with an induction variable which is being
+; truncated/extended using casts that had been proven to
+; be redundant under a runtime test, we want to make sure
+; that these casts, do not get vectorized/scalarized/widened. 
+; This is the case for inductions whose SCEV expression is
+; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc"
+; can be a result of the IR sequences we check below.
+; 
+; See also pr30654.
+;
+
+; Case1: Check the following induction pattern:
+;
+;  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+;  %sext = shl i32 %p.09, 24
+;  %conv = ashr exact i32 %sext, 24
+;  %add = add nsw i32 %conv, %step
+; 
+; This is the case in the following code:
+;
+; void doit1(int n, int step) {
+;   int i;
+;   char p = 0;
+;   for (i = 0; i < n; i++) {
+;      a[i] = p;
+;      p = p + step;
+;   }
+; }
+;
+; The "ExtTrunc" IR sequence here is:
+;  "%sext = shl i32 %p.09, 24"
+;  "%conv = ashr exact i32 %sext, 24"
+; We check that it does not appear in the vector loop body, whether
+; we vectorize or scalarize the induction.
+; In the case of widened induction, this means that the induction phi
+; is directly used, without shl/ashr on the way.
+
+; VF8-LABEL: @doit1
+; VF8: vector.body:
+; VF8: %vec.ind = phi <8 x i32>
+; VF8: store <8 x i32> %vec.ind
+; VF8: middle.block:            
+
+; VF1-LABEL: @doit1
+; VF1: vector.body:
+; VF1-NOT: %{{.*}} = shl i32
+; VF1: middle.block:            
+
+ at a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16
+
+define void @doit1(i32 %n, i32 %step) {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %sext = shl i32 %p.09, 24
+  %conv = ashr exact i32 %sext, 24
+  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add nsw i32 %conv, %step
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; Case2: Another variant of the above pattern is where the induction variable
+; is used only for address compuation (i.e. it is a GEP index) and therefore
+; the induction is not vectorized but rather only the step is widened. 
+;
+; This is the case in the following code, where the induction variable 'w_ix' 
+; is only used to access the array 'in':
+;
+; void doit2(int *in, int *out, size_t size, size_t step)
+; {
+;    int w_ix = 0;
+;    for (size_t offset = 0; offset < size; ++offset)
+;     {
+;        int w = in[w_ix];
+;        out[offset] = w;
+;        w_ix += step;
+;     }
+; }
+;
+; The "ExtTrunc" IR sequence here is similar to the previous case:
+;  "%sext = shl i64 %w_ix.012, 32
+;  %idxprom = ashr exact i64 %sext, 32"
+; We check that it does not appear in the vector loop body, whether
+; we widen or scalarize the induction.
+; In the case of widened induction, this means that the induction phi
+; is directly used, without shl/ashr on the way.
+
+; VF8-LABEL: @doit2
+; VF8: vector.body:
+; VF8: %vec.ind = phi <8 x i64> 
+; VF8: %{{.*}} = extractelement <8 x i64> %vec.ind
+; VF8: middle.block:
+
+; VF1-LABEL: @doit2
+; VF1: vector.body:
+; VF1-NOT: %{{.*}} = shl i64
+; VF1: middle.block:
+;
+
+define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step)  {
+entry:
+  %cmp9 = icmp eq i64 %size, 0
+  br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
+
+for.body.lr.ph:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sext = shl i64 %w_ix.011, 32
+  %idxprom = ashr exact i64 %sext, 32
+  %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010
+  store i32 %0, i32* %arrayidx1, align 4
+  %add = add i64 %idxprom, %step
+  %inc = add nuw i64 %offset.010, 1
+  %exitcond = icmp eq i64 %inc, %size
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+; Case3: Lastly, check also the following induction pattern:
+; 
+;  %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ]
+;  %conv = and i32 %p.09, 255
+;  %add = add nsw i32 %conv, %step
+; 
+; This is the case in the following code:
+;
+; int a[N];
+; void doit3(int n, int step) {
+;   int i;
+;   unsigned char p = 0;
+;   for (i = 0; i < n; i++) {
+;      a[i] = p;
+;      p = p + step;
+;   }
+; }
+; 
+; The "ExtTrunc" IR sequence here is:
+;  "%conv = and i32 %p.09, 255".
+; We check that it does not appear in the vector loop body, whether
+; we vectorize or scalarize the induction.
+
+; VF8-LABEL: @doit3
+; VF8: vector.body:
+; VF8: %vec.ind = phi <8 x i32>
+; VF8: store <8 x i32> %vec.ind
+; VF8: middle.block:            
+
+; VF1-LABEL: @doit3
+; VF1: vector.body:
+; VF1-NOT: %{{.*}} = and i32 
+; VF1: middle.block:            
+
+define void @doit3(i32 %n, i32 %step) {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %conv = and i32 %p.09, 255
+  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add nsw i32 %conv, %step
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/vect.omp.persistence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/vect.omp.persistence.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/vect.omp.persistence.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/vect.omp.persistence.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -O2 -force-vector-interleave=2 -force-vector-width=4 -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; CHECK: LV: Checking a loop in "foo"
+; CHECK: LV: Loop hints: force=enabled
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Ensure that "llvm.loop.vectorize.enable" metadata was not lost even
+; if loop was not rotated.
+;
+; See http://reviews.llvm.org/D3348 for details.
+;
+; CHECK-LABEL: @foo
+; CHECK: !llvm.loop !0
+; CHECK: !0 = distinct !{!0, !1}
+; CHECK: !1 = !{!"llvm.loop.vectorize.enable", i1 true}
+define i32 @foo(i32 %a) {
+entry:
+  br label %loop_cond
+
+loop_cond:
+  %indx = phi i32 [ 1, %entry ], [ %inc, %loop_inc ]
+  %cmp = icmp ne i32 %indx, %a
+  br i1 %cmp, label %return, label %loop_inc
+
+loop_inc:
+  %inc = add i32 %indx, 1
+  br label %loop_cond, !llvm.loop !0
+
+return:
+  ret i32 0
+}
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}

Added: llvm/trunk/test/Transforms/LoopVectorize/vect.stats.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/vect.stats.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/vect.stats.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/vect.stats.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,58 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+;
+; We have 2 loops, one of them is vectorizable and the second one is not.
+;
+
+; CHECK: 2 loop-vectorize               - Number of loops analyzed for vectorization
+; CHECK: 1 loop-vectorize               - Number of loops vectorized
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define void @vectorized(float* nocapture %a, i64 %size) {
+entry:
+  %cmp1 = icmp sle i64 %size, 0
+  %cmp21 = icmp sgt i64 0, %size
+  %or.cond = or i1 %cmp1, %cmp21
+  br i1 %or.cond, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv2
+  %0 = load float, float* %arrayidx, align 4
+  %mul = fmul float %0, %0
+  store float %mul, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
+  %cmp2 = icmp sgt i64 %indvars.iv.next, %size
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.end:                                          ; preds = %entry, %for.body
+  ret void
+}
+
+define void @not_vectorized(float* nocapture %a, i64 %size) {
+entry:
+  %cmp1 = icmp sle i64 %size, 0
+  %cmp21 = icmp sgt i64 0, %size
+  %or.cond = or i1 %cmp1, %cmp21
+  br i1 %or.cond, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %0 = add nsw i64 %indvars.iv2, -5
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %0
+  %1 = load float, float* %arrayidx, align 4
+  %2 = add nsw i64 %indvars.iv2, 2
+  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %2
+  %3 = load float, float* %arrayidx2, align 4
+  %mul = fmul float %1, %3
+  %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv2
+  store float %mul, float* %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
+  %cmp2 = icmp sgt i64 %indvars.iv.next, %size
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.end:                                          ; preds = %entry, %for.body
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/vector-geps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/vector-geps.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/vector-geps.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/vector-geps.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,61 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: @vector_gep_stored(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* %b, <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32** [[TMP2]] to <4 x i32*>*
+; CHECK-NEXT:    store <4 x i32*> [[TMP1]], <4 x i32*>* [[TMP3]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
+;
+define void @vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+  %tmp0 = getelementptr inbounds i32, i32* %b, i64 %i
+  %tmp1 = getelementptr inbounds i32*, i32** %a, i64 %i
+  store i32* %tmp0, i32** %tmp1, align 8
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @uniform_vector_gep_stored(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* %b, i64 1
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32*> undef, i32* [[TMP1]], i32 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32*> [[DOTSPLATINSERT]], <4 x i32*> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32** [[TMP2]] to <4 x i32*>*
+; CHECK-NEXT:    store <4 x i32*> [[DOTSPLAT]], <4 x i32*>* [[TMP3]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
+;
+define void @uniform_vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+  %tmp0 = getelementptr inbounds i32, i32* %b, i64 1
+  %tmp1 = getelementptr inbounds i32*, i32** %a, i64 %i
+  store i32* %tmp0, i32** %tmp1, align 8
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/vectorize-once.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/vectorize-once.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/vectorize-once.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/vectorize-once.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,76 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;
+; We want to make sure that we are vectorizeing the scalar loop only once
+; even if the pass manager runs the vectorizer multiple times due to inlining.
+
+
+; This test checks that we add metadata to vectorized loops
+; CHECK-LABEL: @_Z4foo1Pii(
+; CHECK: <4 x i32>
+; CHECK: llvm.loop
+; CHECK: ret
+
+; This test comes from the loop:
+;
+;int foo (int *A, int n) {
+;  return std::accumulate(A, A + n, 0);
+;}
+define i32 @_Z4foo1Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32, i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32, i32* %__first.addr.04.i, align 4
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32, i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+; This test checks that we don't vectorize loops that are marked with the "width" == 1 metadata.
+; CHECK-LABEL: @_Z4foo2Pii(
+; CHECK-NOT: <4 x i32>
+; CHECK: llvm.loop
+; CHECK: ret
+define i32 @_Z4foo2Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32, i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32, i32* %__first.addr.04.i, align 4
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32, i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i, !llvm.loop !0
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+
+; CHECK: !0 = distinct !{!0, !1}
+; CHECK: !1 = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: !2 = distinct !{!2, !3, !1}
+; CHECK: !3 = !{!"llvm.loop.unroll.runtime.disable"}
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.width", i32 1}

Added: llvm/trunk/test/Transforms/LoopVectorize/version-mem-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/version-mem-access.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/version-mem-access.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/version-mem-access.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,94 @@
+; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 < %s -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check that we version this loop with speculating the value 1 for symbolic
+; strides.  This also checks that the symbolic stride information is correctly
+; propagated to the memcheck generation.  Without this the loop wouldn't
+; vectorize because we couldn't determine the array bounds for the required
+; memchecks.
+
+; CHECK-LABEL: test
+define void @test(i32*  %A, i64 %AStride,
+                  i32*  %B, i32 %BStride,
+                  i32*  %C, i64 %CStride, i32 %N) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+; CHECK-DAG: icmp ne i64 %AStride, 1
+; CHECK-DAG: icmp ne i32 %BStride, 1
+; CHECK-DAG: icmp ne i64 %CStride, 1
+; CHECK: or
+; CHECK: or
+; CHECK: br
+
+; CHECK: vector.body
+; CHECK: load <2 x i32>
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %iv.trunc = trunc i64 %indvars.iv to i32
+  %mul = mul i32 %iv.trunc, %BStride
+  %mul64 = zext i32 %mul to i64
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %mul64
+  %0 = load i32, i32* %arrayidx, align 4
+  %mul2 = mul nsw i64 %indvars.iv, %CStride
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i64 %mul2
+  %1 = load i32, i32* %arrayidx3, align 4
+  %mul4 = mul nsw i32 %1, %0
+  %mul3 = mul nsw i64 %indvars.iv, %AStride
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %mul3
+  store i32 %mul4, i32* %arrayidx7, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; We used to crash on this function because we removed the fptosi cast when
+; replacing the symbolic stride '%conv'.
+; PR18480
+
+; CHECK-LABEL: fn1
+; CHECK: load <2 x double>
+
+define void @fn1(double* noalias %x, double* noalias %c, double %a) {
+entry:
+  %conv = fptosi double %a to i32
+  %conv2 = add i32 %conv, 4
+  %cmp8 = icmp sgt i32 %conv2, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %0 = trunc i64 %indvars.iv to i32
+  %mul = mul nsw i32 %0, %conv
+  %idxprom = sext i32 %mul to i64
+  %arrayidx = getelementptr inbounds double, double* %x, i64 %idxprom
+  %1 = load double, double* %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds double, double* %c, i64 %indvars.iv
+  store double %1, double* %arrayidx3, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %conv2
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,45 @@
+; REQUIRES: asserts
+; RUN: opt < %s  -S -loop-vectorize -enable-vplan-native-path -vplan-build-stress-test -debug-only=loop-vectorize -disable-output 2>&1  | FileCheck %s
+
+; This test checks that, when stress testing VPlan, if the computed VF
+; is 1, we override it to VF = 4.
+
+; CHECK: LV: VPlan computed VF 1.
+; CHECK: LV: VPlan stress testing: overriding computed VF.
+; CHECK: LV: Using VF 4 to build VPlans.
+ at arr2 = external global [8 x i32], align 16
+ at arr = external global [8 x [8 x i32]], align 16
+
+; Function Attrs: norecurse nounwind uwtable
+define void @foo(i32 %n) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc8, %entry
+  %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+  %0 = trunc i64 %indvars.iv21 to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %1 = trunc i64 %indvars.iv21 to i32
+  %add = add nsw i32 %1, %n
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i32 %add, i32* %arrayidx7, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 8
+  br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8:                                         ; preds = %for.body3
+  %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
+  %exitcond23 = icmp eq i64 %indvars.iv.next22, 8
+  br i1 %exitcond23, label %for.end10, label %for.body, !llvm.loop !1
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}

Added: llvm/trunk/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,51 @@
+; RUN: opt < %s -loop-vectorize -enable-vplan-native-path -vplan-build-stress-test -vplan-verify-hcfg -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s -check-prefix=VERIFIER
+; RUN: opt < %s -loop-vectorize -enable-vplan-native-path -vplan-build-stress-test -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s -check-prefix=NO-VERIFIER -allow-empty
+; REQUIRES: asserts
+
+; Verify that the stress testing flag for the VPlan H-CFG builder works as
+; expected with and without enabling the VPlan H-CFG Verifier.
+
+; VERIFIER: Verifying VPlan H-CFG.
+; NO-VERIFIER-NOT: Verifying VPlan H-CFG.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) {
+entry:
+  %cmp32 = icmp sgt i32 %N, 0
+  br i1 %cmp32, label %outer.ph, label %for.end15
+
+outer.ph:
+  %cmp230 = icmp sgt i32 %M, 0
+  %0 = sext i32 %M to i64
+  %wide.trip.count = zext i32 %M to i64
+  %wide.trip.count38 = zext i32 %N to i64
+  br label %outer.body
+
+outer.body:
+  %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
+  br i1 %cmp230, label %inner.ph, label %outer.inc
+
+inner.ph:
+  %1 = mul nsw i64 %indvars.iv35, %0
+  br label %inner.body
+
+inner.body:
+  %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
+  %2 = add nsw i64 %indvars.iv, %1
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
+  %3 = load i32, i32* %arrayidx, align 4
+  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
+  store i32 %3, i32* %arrayidx12, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %outer.inc, label %inner.body
+
+outer.inc:
+  %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
+  %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
+  br i1 %exitcond39, label %for.end15, label %outer.body
+
+for.end15:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/write-only.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/write-only.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/write-only.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/write-only.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;CHECK-LABEL: @read_mod_write_single_ptr(
+;CHECK: load <4 x float>
+;CHECK: ret i32
+define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+  %2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
+  %3 = load float, float* %2, align 4
+  %4 = fmul float %3, 3.000000e+00
+  store float %4, float* %2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}

Added: llvm/trunk/test/Transforms/LoopVectorize/zero-sized-pointee-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/zero-sized-pointee-crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/zero-sized-pointee-crash.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/zero-sized-pointee-crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @fn1
+define void @fn1() {
+entry-block:
+  br label %middle
+
+middle:
+  %0 = phi {}* [ %3, %middle ], [ inttoptr (i64 0 to {}*), %entry-block ]
+  %1 = bitcast {}* %0 to i8*
+  %2 = getelementptr i8, i8* %1, i64 1
+  %3 = bitcast i8* %2 to {}*
+  %4 = icmp eq i8* %2, undef
+  br i1 %4, label %exit, label %middle
+
+; CHECK:      %[[phi:.*]] = phi {}* [ %3, %middle ], [ null, %entry-block ]
+; CHECK-NEXT: %[[bc1:.*]] = bitcast {}* %[[phi]] to i8*
+; CHECK-NEXT: %[[gep:.*]] = getelementptr i8, i8* %[[bc1]], i64 1
+; CHECK-NEXT: %[[bc2:.*]] = bitcast i8* %[[gep]] to {}*
+; CHECK-NEXT: %[[cmp:.*]] = icmp eq i8* %[[gep]], undef
+; CHECK-NEXT: br i1 %[[cmp]],
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVersioning/add-phi-update-users.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioning/add-phi-update-users.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioning/add-phi-update-users.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioning/add-phi-update-users.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,65 @@
+; RUN: opt < %s -loop-versioning -S -o - | FileCheck %s
+
+; This test case used to end like this:
+;
+;    Instruction does not dominate all uses!
+;      %t2 = load i16, i16* @b, align 1, !tbaa !2, !alias.scope !6
+;      %tobool = icmp eq i16 %t2, 0
+;    LLVM ERROR: Broken function found, compilation aborted!
+;
+; due to a fault where we did not replace the use of %t2 in the icmp in
+; for.end, when adding a new PHI node for the versioned loops based on the
+; loop-defined values used outside of the loop.
+;
+; Verify that the code compiles, that we get a versioned loop, and that the
+; uses of %t2 in for.end and if.then are updated to use the value from the
+; added phi node.
+
+; CHECK:       define void @f1
+; CHECK:       for.end:
+; CHECK-NEXT:    %t2.lver = phi i16 [ %t2, %for.body ], [ %t2.lver.orig, %for.body.lver.orig ]
+; CHECK-NEXT:    %tobool = icmp eq i16 %t2.lver, 0
+; CHECK:       if.then:
+; CHECK-NEXT:    store i16 %t2.lver
+
+ at a = dso_local global i16 0, align 1
+ at b = dso_local global i16 0, align 1
+ at c = dso_local global i16* null, align 1
+
+define void @f1() {
+entry:
+  %t0 = load i16*, i16** @c, align 1
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.backedge, %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond, %for.body
+  %t1 = phi i64 [ 0, %for.cond ], [ %inc, %for.body ]
+  %t2 = load i16, i16* @b, align 1, !tbaa !2
+  store i16 %t2, i16* %t0, align 1, !tbaa !2
+  %inc = add nuw nsw i64 %t1, 1
+  %cmp = icmp ult i64 %inc, 3
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %tobool = icmp eq i16 %t2, 0
+  br i1 %tobool, label %for.cond.backedge, label %if.then
+
+for.cond.backedge:                                ; preds = %for.end, %if.then
+  br label %for.cond
+
+if.then:                                          ; preds = %for.end
+  store i16 %t2, i16* @a, align 1, !tbaa !2
+  br label %for.cond.backedge
+}
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 1}
+!1 = !{!"clang version 7.0.0"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"long long", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}

Added: llvm/trunk/test/Transforms/LoopVersioning/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioning/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioning/basic.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioning/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Version this loop with overlap checks between a, c and b, c.
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+  br label %for.body
+
+; CHECK: for.body.lver.check:
+; CHECK:   icmp
+; CHECK:   icmp
+; CHECK:   icmp
+; CHECK:   icmp
+; CHECK-NOT: icmp
+; CHECK:   br i1 %memcheck.conflict, label %for.body.ph.lver.orig, label %for.body.ph
+
+; CHECK: for.body.ph.lver.orig:
+; CHECK: for.body.lver.orig:
+; CHECK:   br i1 %exitcond.lver.orig, label %for.end, label %for.body.lver.orig
+; CHECK: for.body.ph:
+; CHECK: for.body:
+; CHECK:   br i1 %exitcond, label %for.end, label %for.body
+; CHECK: for.end:
+
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulC = mul i32 %loadA, %loadB
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %add = add nuw nsw i64 %ind, 1
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; This test ensures loop versioning does not produce an invalid dominator tree
+; if the exit block of the loop (bb0) dominates the runtime check block
+; (bb1 will become the runtime check block).
+
+; RUN: opt -loop-distribute -enable-loop-distribute -verify-dom-info -S -o - %s > %t
+; RUN: opt -loop-simplify -loop-distribute -enable-loop-distribute -verify-dom-info -S -o - %s > %t
+; RUN: FileCheck --check-prefix CHECK-VERSIONING -input-file %t %s
+
+; RUN: opt -loop-versioning -verify-dom-info -S -o - %s > %t
+; RUN: opt -loop-simplify -loop-versioning -verify-dom-info -S -o - %s > %t
+; RUN: FileCheck --check-prefix CHECK-VERSIONING -input-file %t %s
+
+ at c1 = external global i16
+
+define void @f(i16 %a) {
+  br label %bb0
+
+bb0:
+  br label %bb1
+
+bb1:
+  %tmp1 = load i16, i16* @c1
+  br label %bb2
+
+bb2:
+  %tmp2 = phi i16 [ %tmp1, %bb1 ], [ %tmp3, %bb2 ]
+  %tmp4 = getelementptr inbounds [1 x i32], [1 x i32]* undef, i32 0, i32 4
+  store i32 1, i32* %tmp4
+  %tmp5 = getelementptr inbounds [1 x i32], [1 x i32]* undef, i32 0, i32 9
+  store i32 0, i32* %tmp5
+  %tmp3 = add i16 %tmp2, 1
+  store i16 %tmp2, i16* @c1
+  %tmp6 = icmp sle i16 %tmp3, 0
+  br i1 %tmp6, label %bb2, label %bb0
+}
+
+; Simple check to make sure loop versioning happened.
+; CHECK-VERSIONING: bb2.lver.check:

Added: llvm/trunk/test/Transforms/LoopVersioning/incorrect-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioning/incorrect-phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioning/incorrect-phi.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioning/incorrect-phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,60 @@
+; RUN: opt -loop-versioning -S < %s | FileCheck %s
+
+; Make sure all PHIs are properly updated in the exit block.  Based on
+; PR28037.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = external global [2 x [3 x [5 x i16]]]
+
+; CHECK-LABEL: @phi_with_undef
+define void @phi_with_undef() {
+bb6.lr.ph:                                        ; preds = %bb5.preheader
+  br label %bb6
+
+bb6:                                              ; preds = %bb6.lr.ph, %bb6
+  %_tmp1423 = phi i64 [ undef, %bb6.lr.ph ], [ %_tmp142, %bb6 ]
+  %_tmp123 = getelementptr [2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i16 0, i64 undef
+  %_tmp126 = getelementptr [3 x [5 x i16]], [3 x [5 x i16]]* %_tmp123, i16 0, i64 %_tmp1423
+  %_tmp129 = getelementptr [5 x i16], [5 x i16]* %_tmp126, i16 0, i64 undef
+  %_tmp130 = load i16, i16* %_tmp129
+  store i16 undef, i16* getelementptr ([2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i64 0, i64 undef, i64 undef, i64 undef)
+  %_tmp142 = add i64 %_tmp1423, 1
+  br i1 false, label %bb6, label %loop.exit
+
+loop.exit:                                ; preds = %bb6
+  %_tmp142.lcssa = phi i64 [ %_tmp142, %bb6 ]
+  %split = phi i16 [ undef, %bb6 ]
+; CHECK: %split = phi i16 [ undef, %bb6 ], [ undef, %bb6.lver.orig ]
+  br label %bb9
+
+bb9:                                              ; preds = %bb9.loopexit, %bb1
+  ret void
+}
+
+; CHECK-LABEL: @phi_with_non_loop_defined_value
+define void @phi_with_non_loop_defined_value() {
+bb6.lr.ph:                                        ; preds = %bb5.preheader
+  %t = add i16 1, 1
+  br label %bb6
+
+bb6:                                              ; preds = %bb6.lr.ph, %bb6
+  %_tmp1423 = phi i64 [ undef, %bb6.lr.ph ], [ %_tmp142, %bb6 ]
+  %_tmp123 = getelementptr [2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i16 0, i64 undef
+  %_tmp126 = getelementptr [3 x [5 x i16]], [3 x [5 x i16]]* %_tmp123, i16 0, i64 %_tmp1423
+  %_tmp129 = getelementptr [5 x i16], [5 x i16]* %_tmp126, i16 0, i64 undef
+  %_tmp130 = load i16, i16* %_tmp129
+  store i16 undef, i16* getelementptr ([2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i64 0, i64 undef, i64 undef, i64 undef)
+  %_tmp142 = add i64 %_tmp1423, 1
+  br i1 false, label %bb6, label %loop.exit
+
+loop.exit:                                ; preds = %bb6
+  %_tmp142.lcssa = phi i64 [ %_tmp142, %bb6 ]
+  %split = phi i16 [ %t, %bb6 ]
+; CHECK: %split = phi i16 [ %t, %bb6 ], [ %t, %bb6.lver.orig ]
+  br label %bb9
+
+bb9:                                              ; preds = %bb9.loopexit, %bb1
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVersioning/lcssa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioning/lcssa.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioning/lcssa.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioning/lcssa.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,72 @@
+; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @fill(i8** %ls1.20, i8** %ls2.21, i8* %cse3.22) {
+; CHECK: bb1.lver.check:
+; CHECK:   br i1 %memcheck.conflict, label %bb1.ph.lver.orig, label %bb1.ph
+bb1.ph:
+  %ls1.20.promoted = load i8*, i8** %ls1.20
+  %ls2.21.promoted = load i8*, i8** %ls2.21
+  br label %bb1
+
+bb1:
+  %_tmp302 = phi i8* [ %ls2.21.promoted, %bb1.ph ], [ %_tmp30, %bb1 ]
+  %_tmp281 = phi i8* [ %ls1.20.promoted, %bb1.ph ], [ %_tmp28, %bb1 ]
+  %_tmp14 = getelementptr i8, i8* %_tmp281, i16 -1
+  %_tmp15 = load i8, i8* %_tmp14
+  %add = add i8 %_tmp15, 1
+  store i8 %add, i8* %_tmp281
+  store i8 %add, i8* %_tmp302
+  %_tmp28 = getelementptr i8, i8* %_tmp281, i16 1
+  %_tmp30 = getelementptr i8, i8* %_tmp302, i16 1
+  br i1 false, label %bb1, label %bb3.loopexit
+
+bb3.loopexit:
+  %_tmp30.lcssa = phi i8* [ %_tmp30, %bb1 ]
+  %_tmp15.lcssa = phi i8 [ %_tmp15, %bb1 ]
+  %_tmp28.lcssa = phi i8* [ %_tmp28, %bb1 ]
+  store i8* %_tmp28.lcssa, i8** %ls1.20
+  store i8 %_tmp15.lcssa, i8* %cse3.22
+  store i8* %_tmp30.lcssa, i8** %ls2.21
+  br label %bb3
+
+bb3:
+  ret void
+}
+
+define void @fill_no_null_opt(i8** %ls1.20, i8** %ls2.21, i8* %cse3.22) #0 {
+; CHECK-LABEL: fill_no_null_opt(
+; CHECK: bb1.lver.check:
+; CHECK: %lver.safe = or i1 %memcheck.conflict, %{{.*}}
+; CHECK:  br i1 %lver.safe, label %bb1.ph.lver.orig, label %bb1.ph
+bb1.ph:
+  %ls1.20.promoted = load i8*, i8** %ls1.20
+  %ls2.21.promoted = load i8*, i8** %ls2.21
+  br label %bb1
+
+bb1:
+  %_tmp302 = phi i8* [ %ls2.21.promoted, %bb1.ph ], [ %_tmp30, %bb1 ]
+  %_tmp281 = phi i8* [ %ls1.20.promoted, %bb1.ph ], [ %_tmp28, %bb1 ]
+  %_tmp14 = getelementptr i8, i8* %_tmp281, i16 -1
+  %_tmp15 = load i8, i8* %_tmp14
+  %add = add i8 %_tmp15, 1
+  store i8 %add, i8* %_tmp281
+  store i8 %add, i8* %_tmp302
+  %_tmp28 = getelementptr i8, i8* %_tmp281, i16 1
+  %_tmp30 = getelementptr i8, i8* %_tmp302, i16 1
+  br i1 false, label %bb1, label %bb3.loopexit
+
+bb3.loopexit:
+  %_tmp30.lcssa = phi i8* [ %_tmp30, %bb1 ]
+  %_tmp15.lcssa = phi i8 [ %_tmp15, %bb1 ]
+  %_tmp28.lcssa = phi i8* [ %_tmp28, %bb1 ]
+  store i8* %_tmp28.lcssa, i8** %ls1.20
+  store i8 %_tmp15.lcssa, i8* %cse3.22
+  store i8* %_tmp30.lcssa, i8** %ls2.21
+  br label %bb3
+
+bb3:
+  ret void
+}
+
+attributes #0 = { "null-pointer-is-valid"="true" }

Added: llvm/trunk/test/Transforms/LoopVersioning/loop-invariant-bound.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioning/loop-invariant-bound.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioning/loop-invariant-bound.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioning/loop-invariant-bound.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt -loop-versioning -S < %s | FileCheck %s
+; Checks that when introducing check, we don't accidentally introduce non-dominating instructions
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%Dual.212 = type { %Dual.213, %Partials.215 }
+%Dual.213 = type { double, %Partials.214 }
+%Partials.214 = type { [2 x double] }
+%Partials.215 = type { [2 x %Dual.213] }
+
+; Function Attrs: sspreq
+define void @"julia_axpy!_65480"(%Dual.212*, %Dual.212* %other) {
+top:
+  br label %if24
+
+; CHECK-NOT: %bc = bitcast i64* %v2.sroa.0.0..sroa_cast
+; CHECK: %bound0 = icmp ult i8* %[[x:[a-z0-9]+]], %[[y:[a-z0-9]+]]
+; CHECK-NOT: %bound1 = icmp ult i8* %[[y]], %[[x]]
+
+if24:                                             ; preds = %if24, %top
+  %"#temp#1.sroa.3.02" = phi i64 [ undef, %top ], [ %2, %if24 ]
+  %"#temp#1.sroa.0.01" = phi i64 [ undef, %top ], [ %1, %if24 ]
+  %1 = add i64 %"#temp#1.sroa.0.01", 1
+  %2 = add i64 %"#temp#1.sroa.3.02", 1
+  ; This pointer is loop invariant. LAA used to re-use it from memcheck, even though it didn't dominate.
+  %v2.sroa.0.0..sroa_cast = bitcast %Dual.212* %0 to i64*
+  %v2.sroa.0.0.copyload = load i64, i64* %v2.sroa.0.0..sroa_cast, align 1
+  %3 = add i64 %"#temp#1.sroa.0.01", -1
+  %4 = getelementptr inbounds %Dual.212, %Dual.212* %other, i64 0, i32 1, i32 0, i64 0, i32 1, i32 0, i64 0
+  %5 = bitcast double* %4 to i64*
+  store i64 undef, i64* %5, align 8
+  %notlhs27 = icmp eq i64 %2, undef
+  %notrhs28 = icmp eq i64 %1, undef
+  %6 = or i1 %notrhs28, %notlhs27
+  br i1 %6, label %L41.L335_crit_edge, label %if24
+
+L41.L335_crit_edge:                               ; preds = %if24
+  ret void
+}

Added: llvm/trunk/test/Transforms/LoopVersioning/noalias-version-twice.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioning/noalias-version-twice.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioning/noalias-version-twice.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioning/noalias-version-twice.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,107 @@
+; RUN: opt -basicaa -loop-distribute -enable-loop-distribute -loop-simplify -scoped-noalias \
+; RUN:     -loop-versioning -S < %s | FileCheck %s
+
+; Test the metadata generated when versioning an already versioned loop.  Here
+; we invoke loop distribution to perform the first round of versioning.  It
+; adds memchecks for accesses that can alias across the distribution boundary.
+; Then we further version the distributed loops to fully disambiguate accesses
+; within each.
+;
+; So as an example, we add noalias between C and A during the versioning
+; within loop distribution and then add noalias between C and D during the
+; second explicit versioning step:
+;
+;   for (i = 0; i < n; i++) {
+;     A[i + 1] = A[i] * B[i];
+; -------------------------------
+;     C[i] = D[i] * E[i];
+;   }
+
+; To see it easier what's going on, I expanded every noalias/scope metadata
+; reference below in a comment.  For a scope I use the format scope(domain),
+; e.g. scope 17 in domain 15 is written as 17(15).
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+ at B = common global i32* null, align 8
+ at A = common global i32* null, align 8
+ at C = common global i32* null, align 8
+ at D = common global i32* null, align 8
+ at E = common global i32* null, align 8
+
+define void @f() {
+entry:
+  %a = load i32*, i32** @A, align 8
+  %b = load i32*, i32** @B, align 8
+  %c = load i32*, i32** @C, align 8
+  %d = load i32*, i32** @D, align 8
+  %e = load i32*, i32** @E, align 8
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+
+; CHECK: %loadA.ldist1 = {{.*}} !noalias !25
+; A noalias C: !25 -> { 17(15), 18(15), 19(15), 26(24) }
+;                       ^^^^^^
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulA = mul i32 %loadB, %loadA
+
+  %add = add nuw nsw i64 %ind, 1
+  %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+  store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+; CHECK: for.body:
+
+  %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+
+; CHECK: %loadD = {{.*}} !alias.scope !31
+; D's scope: !31 -> { 18(15), 32(33) }
+;                             ^^^^^^
+  %loadD = load i32, i32* %arrayidxD, align 4
+
+  %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
+
+; CHECK: %loadE = {{.*}} !alias.scope !34
+; E's scope: !34 -> { 19(15), 35(33) }
+;                             ^^^^^^
+  %loadE = load i32, i32* %arrayidxE, align 4
+
+  %mulC = mul i32 %loadD, %loadE
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+
+; CHECK: store i32 %mulC, {{.*}} !alias.scope !36, !noalias !38
+; C's scope: !36 -> { 17(15), 37(33) }
+;                     ^^^^^^
+; C noalias D and E: !38 -> { 21(15), 32(33), 35(33) }
+;                                     ^^^^^^  ^^^^^^
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Domain for the second loop versioning for the top loop after
+; distribution.
+; CHECK: !15 = distinct !{!15, !"LVerDomain"}
+; CHECK: !17 = distinct !{!17, !15}
+; CHECK: !25 = !{!17, !18, !19, !26}
+; CHECK: !31 = !{!18, !32}
+; CHECK: !32 = distinct !{!32, !33}
+; Domain for the second loop versioning for the bottom loop after
+; distribution.
+; CHECK: !33 = distinct !{!33, !"LVerDomain"}
+; CHECK: !34 = !{!19, !35}
+; CHECK: !35 = distinct !{!35, !33}
+; CHECK: !36 = !{!17, !37}
+; CHECK: !38 = !{!21, !32, !35}

Added: llvm/trunk/test/Transforms/LoopVersioning/noalias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioning/noalias.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioning/noalias.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioning/noalias.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,54 @@
+; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s
+
+; A very simple case.  After versioning the %loadA and %loadB can't alias with
+; the store.
+;
+; To see it easier what's going on, I expanded every noalias/scope metadata
+; reference below in a comment.  For a scope I use the format scope(domain),
+; e.g. scope 17 in domain 15 is written as 17(15).
+
+; CHECK-LABEL: @f(
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+  br label %for.body
+
+; CHECK: for.body.lver.orig:
+; CHECK: for.body:
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+; CHECK: %loadA = {{.*}} !alias.scope !0
+; A's scope: !0 -> { 1(2) }
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+; CHECK: %loadB = {{.*}} !alias.scope !3
+; B's scope: !3 -> { 4(2) }
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulC = mul i32 %loadA, %loadB
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+; CHECK: store {{.*}} !alias.scope !5, !noalias !7
+; C noalias A and B: !7 -> { 1(2), 4(2) }
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %add = add nuw nsw i64 %ind, 1
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+; CHECK: !0 = !{!1}
+; CHECK: !1 = distinct !{!1, !2}
+; CHECK: !2 = distinct !{!2, !"LVerDomain"}
+; CHECK: !3 = !{!4}
+; CHECK: !4 = distinct !{!4, !2}
+; CHECK: !5 = !{!6}
+; CHECK: !6 = distinct !{!6, !2}
+; CHECK: !7 = !{!1, !4}

Added: llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt < %s  -O1  -S -loop-versioning-licm -licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s
+; REQUIRES: asserts
+;
+; Test to confirm loop is a candidate for LoopVersioningLICM.
+; It also confirms invariant moved out of loop.
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3<header><latch><exiting>
+; CHECK-NEXT:   Loop Versioning found to be beneficial
+;
+; CHECK: for.body3:
+; CHECK-NEXT: %[[induction:.*]] = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ]
+; CHECK-NEXT: %j.113 = phi i32 [ %j.016, %for.body3.ph ], [ %inc, %for.body3 ]
+; CHECK-NEXT: %idxprom = zext i32 %j.113 to i64
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !2, !noalias !2
+; CHECK-NEXT: %add8 = add nsw i32 %[[induction]], %add
+; CHECK-NEXT: %inc = add nuw i32 %j.113, 1
+; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr
+; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit7, !llvm.loop !5
+define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
+entry:
+  %cmp14 = icmp eq i32 %itr, 0
+  br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc11
+  %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+  %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp212 = icmp ult i32 %j.016, %itr
+  br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %add = add i32 %i.015, %itr
+  %idxprom6 = zext i32 %i.015 to i64
+  %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.lr.ph, %for.body3
+  %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ]
+  %idxprom = zext i32 %j.113 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+  store i32 %add, i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %0, %add
+  store i32 %add8, i32* %arrayidx7, align 4
+  %inc = add nuw i32 %j.113, 1
+  %cmp2 = icmp ult i32 %inc, %itr
+  br i1 %cmp2, label %for.body3, label %for.inc11.loopexit
+
+for.inc11.loopexit:                               ; preds = %for.body3
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.inc11.loopexit, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ]
+  %inc12 = add nuw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc12, %itr
+  br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit
+
+for.end13.loopexit:                               ; preds = %for.inc11
+  br label %for.end13
+
+for.end13:                                        ; preds = %for.end13.loopexit, %entry
+  ret i32 0
+}
+

Added: llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,52 @@
+; RUN: opt < %s  -O1  -S -loop-versioning-licm -licm -debug-only=loop-versioning-licm -disable-loop-unrolling 2>&1 | FileCheck %s
+; REQUIRES: asserts
+;
+; Test to confirm loop is a good candidate for LoopVersioningLICM
+; It also confirms invariant moved out of loop.
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3.us<header><latch><exiting>
+; CHECK-NEXT:     Loop Versioning found to be beneficial
+;
+; CHECK: for.cond1.for.inc17_crit_edge.us.loopexit6:       ; preds = %for.body3.us
+; CHECK-NEXT: %add14.us.lcssa = phi float [ %add14.us, %for.body3.us ]
+; CHECK-NEXT: store float %add14.us.lcssa, float* %arrayidx.us, align 4, !alias.scope !0, !noalias !0
+; CHECK-NEXT: br label %for.cond1.for.inc17_crit_edge.us
+;
+define i32 @foo(float* nocapture %var2, float** nocapture readonly %var3, i32 %itr) #0 {
+entry:
+  %cmp38 = icmp sgt i32 %itr, 1
+  br i1 %cmp38, label %for.body3.lr.ph.us, label %for.end19
+
+for.body3.us:                                     ; preds = %for.body3.us, %for.body3.lr.ph.us
+  %0 = phi float [ %.pre, %for.body3.lr.ph.us ], [ %add14.us, %for.body3.us ]
+  %indvars.iv = phi i64 [ 1, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ]
+  %1 = trunc i64 %indvars.iv to i32
+  %conv.us = sitofp i32 %1 to float
+  %add.us = fadd float %conv.us, %0
+  %arrayidx7.us = getelementptr inbounds float, float* %3, i64 %indvars.iv
+  store float %add.us, float* %arrayidx7.us, align 4
+  %2 = load float, float* %arrayidx.us, align 4
+  %add14.us = fadd float %2, %add.us
+  store float %add14.us, float* %arrayidx.us, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %itr
+  br i1 %exitcond, label %for.cond1.for.inc17_crit_edge.us, label %for.body3.us
+
+for.body3.lr.ph.us:                               ; preds = %entry, %for.cond1.for.inc17_crit_edge.us
+  %indvars.iv40 = phi i64 [ %indvars.iv.next41, %for.cond1.for.inc17_crit_edge.us ], [ 1, %entry ]
+  %arrayidx.us = getelementptr inbounds float, float* %var2, i64 %indvars.iv40
+  %arrayidx6.us = getelementptr inbounds float*, float** %var3, i64 %indvars.iv40
+  %3 = load float*, float** %arrayidx6.us, align 8
+  %.pre = load float, float* %arrayidx.us, align 4
+  br label %for.body3.us
+
+for.cond1.for.inc17_crit_edge.us:                 ; preds = %for.body3.us
+  %indvars.iv.next41 = add nuw nsw i64 %indvars.iv40, 1
+  %lftr.wideiv42 = trunc i64 %indvars.iv.next41 to i32
+  %exitcond43 = icmp eq i32 %lftr.wideiv42, %itr
+  br i1 %exitcond43, label %for.end19, label %for.body3.lr.ph.us
+
+for.end19:                                        ; preds = %for.cond1.for.inc17_crit_edge.us, %entry
+  ret i32 0
+}

Added: llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,45 @@
+; RUN: opt < %s  -O1  -S -loop-versioning-licm -debug-only=loop-versioning-licm  2>&1 | FileCheck %s
+; REQUIRES: asserts
+;
+; Test to confirm loop is not a candidate for LoopVersioningLICM.
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3<header><latch><exiting>
+; CHECK-NEXT:    LAA: Runtime check not found !!
+; CHECK-NEXT:    Loop instructions not suitable for LoopVersioningLICM
+
+define i32 @foo(i32* nocapture %var1, i32 %itr) #0 {
+entry:
+  %cmp18 = icmp eq i32 %itr, 0
+  br i1 %cmp18, label %for.end8, label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc6
+  %j.020 = phi i32 [ %j.1.lcssa, %for.inc6 ], [ 0, %entry ]
+  %i.019 = phi i32 [ %inc7, %for.inc6 ], [ 0, %entry ]
+  %cmp216 = icmp ult i32 %j.020, %itr
+  br i1 %cmp216, label %for.body3.lr.ph, label %for.inc6
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %0 = zext i32 %j.020 to i64
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %1, %itr
+  store i32 %add, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %itr
+  br i1 %exitcond, label %for.inc6, label %for.body3
+
+for.inc6:                                         ; preds = %for.body3, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.020, %for.cond1.preheader ], [ %itr, %for.body3 ]
+  %inc7 = add nuw i32 %i.019, 1
+  %exitcond21 = icmp eq i32 %inc7, %itr
+  br i1 %exitcond21, label %for.end8, label %for.cond1.preheader
+
+for.end8:                                         ; preds = %for.inc6, %entry
+  ret i32 0
+}
+

Added: llvm/trunk/test/Transforms/LoopVersioningLICM/metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVersioningLICM/metadata.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVersioningLICM/metadata.ll (added)
+++ llvm/trunk/test/Transforms/LoopVersioningLICM/metadata.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,104 @@
+; RUN: opt < %s  -O1  -S -loop-versioning-licm -licm 2>&1 | FileCheck %s
+
+; CHECK-LABEL: @without_metadata(
+define i32 @without_metadata(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
+entry:
+  %cmp14 = icmp eq i32 %itr, 0
+  br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc11
+  %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+  %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp212 = icmp ult i32 %j.016, %itr
+  br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %add = add i32 %i.015, %itr
+  %idxprom6 = zext i32 %i.015 to i64
+  %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.lr.ph, %for.body3
+  %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ]
+  %idxprom = zext i32 %j.113 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+; CHECK: store i32 %add, i32* %arrayidx, align 4, !alias.scope {{.*}}, !noalias {{.*}}
+  store i32 %add, i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %0, %add
+  store i32 %add8, i32* %arrayidx7, align 4
+  %inc = add nuw i32 %j.113, 1
+  %cmp2 = icmp ult i32 %inc, %itr
+  br i1 %cmp2, label %for.body3, label %for.inc11.loopexit
+
+for.inc11.loopexit:                               ; preds = %for.body3
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.inc11.loopexit, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ]
+  %inc12 = add nuw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc12, %itr
+  br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit
+
+for.end13.loopexit:                               ; preds = %for.inc11
+  br label %for.end13
+
+for.end13:                                        ; preds = %for.end13.loopexit, %entry
+  ret i32 0
+}
+
+; CHECK-LABEL: @with_metadata(
+define i32 @with_metadata(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
+entry:
+  %cmp14 = icmp eq i32 %itr, 0
+  br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc11
+  %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+  %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp212 = icmp ult i32 %j.016, %itr
+  br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %add = add i32 %i.015, %itr
+  %idxprom6 = zext i32 %i.015 to i64
+  %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.lr.ph, %for.body3
+  %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ]
+  %idxprom = zext i32 %j.113 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+; CHECK-NOT: store i32 %add, i32* %arrayidx, align 4, !alias.scope {{.*}}, !noalias {{.*}}
+  store i32 %add, i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %0, %add
+  store i32 %add8, i32* %arrayidx7, align 4
+  %inc = add nuw i32 %j.113, 1
+  %cmp2 = icmp ult i32 %inc, %itr
+  br i1 %cmp2, label %for.body3, label %for.inc11.loopexit, !llvm.loop !0
+
+for.inc11.loopexit:                               ; preds = %for.body3
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.inc11.loopexit, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ]
+  %inc12 = add nuw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc12, %itr
+  br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit
+
+for.end13.loopexit:                               ; preds = %for.inc11
+  br label %for.end13
+
+for.end13:                                        ; preds = %for.end13.loopexit, %entry
+  ret i32 0
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.licm_versioning.disable"}

Added: llvm/trunk/test/Transforms/LowerAtomic/atomic-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerAtomic/atomic-load.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerAtomic/atomic-load.ll (added)
+++ llvm/trunk/test/Transforms/LowerAtomic/atomic-load.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt < %s -loweratomic -S | FileCheck %s
+; RUN: opt < %s -passes=loweratomic -S | FileCheck %s
+
+define i8 @add() {
+; CHECK-LABEL: @add(
+  %i = alloca i8
+  %j = atomicrmw add i8* %i, i8 42 monotonic
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: add
+; CHECK-NEXT: store
+  ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
+
+define i8 @nand() {
+; CHECK-LABEL: @nand(
+  %i = alloca i8
+  %j = atomicrmw nand i8* %i, i8 42 monotonic
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: and
+; CHECK-NEXT: xor
+; CHECK-NEXT: store
+  ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
+
+define i8 @min() {
+; CHECK-LABEL: @min(
+  %i = alloca i8
+  %j = atomicrmw min i8* %i, i8 42 monotonic
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: icmp
+; CHECK-NEXT: select
+; CHECK-NEXT: store
+  ret i8 %j
+; CHECK: ret i8 [[INST]]
+}

Added: llvm/trunk/test/Transforms/LowerAtomic/atomic-swap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerAtomic/atomic-swap.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerAtomic/atomic-swap.ll (added)
+++ llvm/trunk/test/Transforms/LowerAtomic/atomic-swap.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt < %s -loweratomic -S | FileCheck %s
+
+define i8 @cmpswap() {
+; CHECK-LABEL: @cmpswap(
+  %i = alloca i8
+  %pair = cmpxchg i8* %i, i8 0, i8 42 monotonic monotonic
+  %j = extractvalue { i8, i1 } %pair, 0
+; CHECK: [[OLDVAL:%[a-z0-9]+]] = load i8, i8* [[ADDR:%[a-z0-9]+]]
+; CHECK-NEXT: [[SAME:%[a-z0-9]+]] = icmp eq i8 [[OLDVAL]], 0
+; CHECK-NEXT: [[TO_STORE:%[a-z0-9]+]] = select i1 [[SAME]], i8 42, i8 [[OLDVAL]]
+; CHECK-NEXT: store i8 [[TO_STORE]], i8* [[ADDR]]
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = insertvalue { i8, i1 } undef, i8 [[OLDVAL]], 0
+; CHECK-NEXT: [[RES:%[a-z0-9]+]] = insertvalue { i8, i1 } [[TMP]], i1 [[SAME]], 1
+; CHECK-NEXT: [[VAL:%[a-z0-9]+]] = extractvalue { i8, i1 } [[RES]], 0
+  ret i8 %j
+; CHECK: ret i8 [[VAL]]
+}
+
+
+define i8 @swap() {
+; CHECK-LABEL: @swap(
+  %i = alloca i8
+  %j = atomicrmw xchg i8* %i, i8 42 monotonic
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: store
+  ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
+
+
+define i8 @swap_optnone() noinline optnone {
+; CHECK-LABEL: @swap_optnone(
+  %i = alloca i8
+  %j = atomicrmw xchg i8* %i, i8 42 monotonic
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: store
+  ret i8 %j
+; CHECK: ret i8 [[INST]]
+}

Added: llvm/trunk/test/Transforms/LowerAtomic/barrier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerAtomic/barrier.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerAtomic/barrier.ll (added)
+++ llvm/trunk/test/Transforms/LowerAtomic/barrier.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,8 @@
+; RUN: opt < %s -loweratomic -S | FileCheck %s
+
+define void @barrier() {
+; CHECK-LABEL: @barrier(
+  fence seq_cst
+; CHECK-NEXT: ret
+  ret void
+}

Added: llvm/trunk/test/Transforms/LowerExpectIntrinsic/PR33346.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerExpectIntrinsic/PR33346.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerExpectIntrinsic/PR33346.ll (added)
+++ llvm/trunk/test/Transforms/LowerExpectIntrinsic/PR33346.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt -lower-expect -S < %s
+; RUN: opt -passes='function(lower-expect)' -S < %s
+
+define i64 @foo(i64 %arg) #0 {
+bb:
+  %tmp = alloca i64, align 8
+  store i64 %arg, i64* %tmp, align 8
+  %tmp1 = load i64, i64* %tmp, align 8
+  %tmp2 = load i64, i64* %tmp, align 8
+  %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 %tmp2)
+  ret i64 %tmp3
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.expect.i64(i64, i64)
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304723)"}

Added: llvm/trunk/test/Transforms/LowerExpectIntrinsic/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerExpectIntrinsic/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerExpectIntrinsic/basic.ll (added)
+++ llvm/trunk/test/Transforms/LowerExpectIntrinsic/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,291 @@
+; RUN: opt -lower-expect -strip-dead-prototypes -S -o - < %s | FileCheck %s
+; RUN: opt -S -passes='function(lower-expect),strip-dead-prototypes' < %s | FileCheck %s
+
+; CHECK-LABEL: @test1(
+define i32 @test1(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
+  %cmp = icmp sgt i32 %tmp, 1
+  %conv = zext i1 %cmp to i32
+  %conv1 = sext i32 %conv to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1)
+  %tobool = icmp ne i64 %expval, 0
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...) @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32, i32* %retval
+  ret i32 %0
+}
+
+declare i64 @llvm.expect.i64(i64, i64) nounwind readnone
+
+declare i32 @f(...)
+
+; CHECK-LABEL: @test2(
+define i32 @test2(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool = icmp ne i64 %expval, 0
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...) @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32, i32* %retval
+  ret i32 %0
+}
+
+; CHECK-LABEL: @test3(
+define i32 @test3(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
+  %tobool = icmp ne i32 %tmp, 0
+  %lnot = xor i1 %tobool, true
+  %lnot.ext = zext i1 %lnot to i32
+  %conv = sext i32 %lnot.ext to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool1 = icmp ne i64 %expval, 0
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...) @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32, i32* %retval
+  ret i32 %0
+}
+
+; CHECK-LABEL: @test4(
+define i32 @test4(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
+  %tobool = icmp ne i32 %tmp, 0
+  %lnot = xor i1 %tobool, true
+  %lnot1 = xor i1 %lnot, true
+  %lnot.ext = zext i1 %lnot1 to i32
+  %conv = sext i32 %lnot.ext to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool2 = icmp ne i64 %expval, 0
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool2, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...) @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32, i32* %retval
+  ret i32 %0
+}
+
+; CHECK-LABEL: @test5(
+define i32 @test5(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
+  %cmp = icmp slt i32 %tmp, 0
+  %conv = zext i1 %cmp to i32
+  %conv1 = sext i32 %conv to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 0)
+  %tobool = icmp ne i64 %expval, 0
+; CHECK: !prof !1
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...) @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32, i32* %retval
+  ret i32 %0
+}
+
+; CHECK-LABEL: @test6(
+define i32 @test6(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+; CHECK: !prof !2
+; CHECK-NOT: @llvm.expect
+  switch i64 %expval, label %sw.epilog [
+    i64 1, label %sw.bb
+    i64 2, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+  store i32 0, i32* %retval
+  br label %return
+
+sw.epilog:                                        ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %sw.epilog, %sw.bb
+  %0 = load i32, i32* %retval
+  ret i32 %0
+}
+
+; CHECK-LABEL: @test7(
+define i32 @test7(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+; CHECK: !prof !3
+; CHECK-NOT: @llvm.expect
+  switch i64 %expval, label %sw.epilog [
+    i64 2, label %sw.bb
+    i64 3, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+  %tmp1 = load i32, i32* %x.addr, align 4
+  store i32 %tmp1, i32* %retval
+  br label %return
+
+sw.epilog:                                        ; preds = %entry
+  store i32 0, i32* %retval
+  br label %return
+
+return:                                           ; preds = %sw.epilog, %sw.bb
+  %0 = load i32, i32* %retval
+  ret i32 %0
+}
+
+; CHECK-LABEL: @test8(
+define i32 @test8(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
+  %cmp = icmp sgt i32 %tmp, 1
+  %conv = zext i1 %cmp to i32
+  %expval = call i32 @llvm.expect.i32(i32 %conv, i32 1)
+  %tobool = icmp ne i32 %expval, 0
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...) @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32, i32* %retval
+  ret i32 %0
+}
+
+declare i32 @llvm.expect.i32(i32, i32) nounwind readnone
+
+; CHECK-LABEL: @test9(
+define i32 @test9(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
+  %cmp = icmp sgt i32 %tmp, 1
+  %expval = call i1 @llvm.expect.i1(i1 %cmp, i1 1)
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %expval, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...) @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32, i32* %retval
+  ret i32 %0
+}
+
+; CHECK-LABEL: @test10(
+define i32 @test10(i64 %t6) {
+  %t7 = call i64 @llvm.expect.i64(i64 %t6, i64 0)
+  %t8 = icmp ne i64 %t7, 0
+  %t9 = select i1 %t8, i32 1, i32 2
+; CHECK: select{{.*}}, !prof !1
+  ret i32 %t9
+}
+
+
+declare i1 @llvm.expect.i1(i1, i1) nounwind readnone
+
+; CHECK: !0 = !{!"branch_weights", i32 2000, i32 1}
+; CHECK: !1 = !{!"branch_weights", i32 1, i32 2000}
+; CHECK: !2 = !{!"branch_weights", i32 1, i32 2000, i32 1}
+; CHECK: !3 = !{!"branch_weights", i32 2000, i32 1, i32 1}

Added: llvm/trunk/test/Transforms/LowerExpectIntrinsic/expect_nonboolean.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerExpectIntrinsic/expect_nonboolean.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerExpectIntrinsic/expect_nonboolean.ll (added)
+++ llvm/trunk/test/Transforms/LowerExpectIntrinsic/expect_nonboolean.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,104 @@
+; RUN: opt -lower-expect  -S -o - < %s | FileCheck %s
+; RUN: opt -S -passes='function(lower-expect)' < %s | FileCheck %s
+
+define i32 @foo(i32 %arg) #0 {
+; CHECK-LABEL: @foo(i32{{.*}})
+bb:
+  %tmp = sext i32 %arg to i64
+  %tmp1 = call i64 @llvm.expect.i64(i64 %tmp, i64 4)
+  %tmp2 = icmp ne i64 %tmp1, 0
+  br i1 %tmp2, label %bb3, label %bb5
+; CHECK: br i1 %tmp2{{.*}}!prof [[LIKELY:![0-9]+]]
+
+bb3:                                              ; preds = %bb
+  %tmp4 = call i32 (...) @bar()
+  br label %bb5
+
+bb5:                                              ; preds = %bb3, %bb
+  ret i32 1
+}
+
+define i32 @foo2(i32 %arg) #0 {
+; CHECK-LABEL: @foo2
+bb:
+  %tmp = sext i32 %arg to i64
+  %tmp1 = call i64 @llvm.expect.i64(i64 %tmp, i64 4)
+  %tmp2 = icmp eq i64 %tmp1, 2
+  br i1 %tmp2, label %bb3, label %bb5
+; CHECK: br i1 %tmp2{{.*}}!prof [[UNLIKELY:![0-9]+]]
+
+bb3:                                              ; preds = %bb
+  %tmp4 = call i32 (...) @bar()
+  br label %bb5
+
+bb5:                                              ; preds = %bb3, %bb
+  ret i32 1
+}
+
+define i32 @foo3(i32 %arg) #0 {
+; CHECK-LABEL: @foo3
+bb:
+  %tmp = sext i32 %arg to i64
+  %tmp1 = call i64 @llvm.expect.i64(i64 %tmp, i64 4)
+  %tmp2 = icmp eq i64 %tmp1, 4
+  br i1 %tmp2, label %bb3, label %bb5
+; CHECK: br i1 %tmp2{{.*}}!prof [[LIKELY]]
+
+bb3:                                              ; preds = %bb
+  %tmp4 = call i32 (...) @bar()
+  br label %bb5
+
+bb5:                                              ; preds = %bb3, %bb
+  ret i32 1
+}
+
+define i32 @foo4(i32 %arg) #0 {
+; CHECK-LABEL: @foo4
+bb:
+  %tmp = sext i32 %arg to i64
+  %tmp1 = call i64 @llvm.expect.i64(i64 %tmp, i64 4)
+  %tmp2 = icmp ne i64 %tmp1, 2
+  br i1 %tmp2, label %bb3, label %bb5
+; CHECK: br i1 %tmp2{{.*}}!prof [[LIKELY]]
+
+bb3:                                              ; preds = %bb
+  %tmp4 = call i32 (...) @bar()
+  br label %bb5
+
+bb5:                                              ; preds = %bb3, %bb
+  ret i32 1
+}
+
+define i32 @foo5(i32 %arg, i32 %arg1) #0 {
+; CHECK-LABEL: @foo5
+bb:
+  %tmp = sext i32 %arg1 to i64
+  %tmp2 = call i64 @llvm.expect.i64(i64 %tmp, i64 4)
+  %tmp3 = sext i32 %arg to i64
+  %tmp4 = icmp ne i64 %tmp2, %tmp3
+  br i1 %tmp4, label %bb5, label %bb7
+; CHECK-NOT: !prof
+
+bb5:                                              ; preds = %bb
+  %tmp6 = call i32 (...) @bar()
+  br label %bb7
+
+bb7:                                              ; preds = %bb5, %bb
+  ret i32 1
+}
+
+declare i64 @llvm.expect.i64(i64, i64) #1
+
+declare i32 @bar(...) local_unnamed_addr #0
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304373)"}
+; CHECK: [[LIKELY]] = !{!"branch_weights", i32 2000, i32 1}
+; CHECK: [[UNLIKELY]] = !{!"branch_weights", i32 1, i32 2000}
+

Added: llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_merge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_merge.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_merge.ll (added)
+++ llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_merge.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,356 @@
+; RUN: opt -lower-expect  -S -o - < %s | FileCheck %s
+; RUN: opt -S -passes='function(lower-expect)' < %s | FileCheck %s
+
+; The C case
+; if (__builtin_expect((x > goo() && y > hoo() && z > too()), 1)) 
+; For the above case, all 3 branches should be annotated.
+;
+; if (__builtin_expect((x > goo() && y > hoo() && z > too()), 0)) 
+; For the above case, we don't have enough information, so
+; only the last branch is annotated.
+
+define void @foo(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo
+bb:
+  %tmp8 = call i32  @goo() 
+  %tmp9 = icmp sgt i32 %tmp8, %arg
+  br i1 %tmp9, label %bb10, label %bb18
+; CHECK: !prof [[WEIGHT:![0-9]+]]
+
+bb10:                                             ; preds = %bb
+  %tmp12 = call i32  @hoo()
+  %tmp13 = icmp sgt i32 %arg1, %tmp12
+  br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13, {{.*}}!prof [[WEIGHT]]
+
+bb14:                                             ; preds = %bb10
+  %tmp16 = call i32  @too()
+  %tmp17 = icmp sgt i32 %arg2, %tmp16
+  br label %bb18
+
+bb18:                                             ; preds = %bb14, %bb10, %bb
+  %tmp19 = phi i1 [ false, %bb10 ], [ false, %bb ], [ %tmp17, %bb14 ]
+  %tmp20 = xor i1 %tmp19, true
+  %tmp21 = xor i1 %tmp20, true
+  %tmp22 = zext i1 %tmp21 to i32
+  %tmp23 = sext i32 %tmp22 to i64
+  %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 1)
+  %tmp25 = icmp ne i64 %tmp24, 0
+  br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26:                                             ; preds = %bb18
+  %tmp27 = call i32  @goo()
+  br label %bb30
+
+bb28:                                             ; preds = %bb18
+  %tmp29 = call i32  @hoo()
+  br label %bb30
+
+bb30:                                             ; preds = %bb28, %bb26
+  ret void
+}
+
+define void @foo2(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo2
+bb:
+  %tmp8 = call i32  @goo() 
+  %tmp9 = icmp sgt i32 %tmp8, %arg
+  br i1 %tmp9, label %bb10, label %bb18
+; CHECK:  br i1 %tmp9
+; CHECK-NOT: !prof
+
+bb10:                                             ; preds = %bb
+  %tmp12 = call i32  @hoo()
+  %tmp13 = icmp sgt i32 %arg1, %tmp12
+  br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13
+; CHECK-NOT: !prof
+
+bb14:                                             ; preds = %bb10
+  %tmp16 = call i32 @too()
+  %tmp17 = icmp sgt i32 %arg2, %tmp16
+  br label %bb18
+
+bb18:                                             ; preds = %bb14, %bb10, %bb
+  %tmp19 = phi i1 [ false, %bb10 ], [ false, %bb ], [ %tmp17, %bb14 ]
+  %tmp20 = xor i1 %tmp19, true
+  %tmp21 = xor i1 %tmp20, true
+  %tmp22 = zext i1 %tmp21 to i32
+  %tmp23 = sext i32 %tmp22 to i64
+  %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 0)
+  %tmp25 = icmp ne i64 %tmp24, 0
+  br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT2:![0-9]+]]
+
+bb26:                                             ; preds = %bb18
+  %tmp27 = call i32 @goo()
+  br label %bb30
+
+bb28:                                             ; preds = %bb18
+  %tmp29 = call i32 @hoo()
+  br label %bb30
+
+bb30:                                             ; preds = %bb28, %bb26
+  ret void
+}
+
+define void @foo_i32(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo_i32
+bb:
+  %tmp8 = call i32  @goo() 
+  %tmp9 = icmp sgt i32 %tmp8, %arg
+  br i1 %tmp9, label %bb10, label %bb18
+; CHECK: !prof [[WEIGHT]]
+
+bb10:                                             ; preds = %bb
+  %tmp12 = call i32 @hoo()
+  %tmp13 = icmp sgt i32 %arg1, %tmp12
+  br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13, {{.*}}!prof [[WEIGHT]]
+
+bb14:                                             ; preds = %bb10
+  %tmp16 = call i32 @too()
+  %tmp17 = icmp sgt i32 %arg2, %tmp16
+  br label %bb18
+
+bb18:                                             ; preds = %bb14, %bb10, %bb
+  %tmp19 = phi i32 [ 5, %bb10 ], [ 5, %bb ], [ %tmp16, %bb14 ]
+  %tmp23 = sext i32 %tmp19 to i64
+  %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4)
+  %tmp25 = icmp ne i64 %tmp24, 0
+  br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26:                                             ; preds = %bb18
+  %tmp27 = call i32 @goo()
+  br label %bb30
+
+bb28:                                             ; preds = %bb18
+  %tmp29 = call i32 @hoo()
+  br label %bb30
+
+bb30:                                             ; preds = %bb28, %bb26
+  ret void
+}
+
+
+define void @foo_i32_not_unlikely(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3)  {
+; CHECK-LABEL: void @foo_i32_not_unlikely
+bb:
+  %tmp8 = call i32 @goo() 
+  %tmp9 = icmp sgt i32 %tmp8, %arg
+  br i1 %tmp9, label %bb10, label %bb18
+; CHECK: br i1 %tmp9
+; CHECK-NOT: !prof
+
+bb10:                                             ; preds = %bb
+  %tmp12 = call i32 @hoo()
+  %tmp13 = icmp sgt i32 %arg1, %tmp12
+  br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13
+; CHECK-NOT: !prof
+
+bb14:                                             ; preds = %bb10
+  %tmp16 = call i32  @too()
+  %tmp17 = icmp sgt i32 %arg2, %tmp16
+  br label %bb18
+
+bb18:                                             ; preds = %bb14, %bb10, %bb
+  %tmp19 = phi i32 [ 4, %bb10 ], [ 4, %bb ], [ %tmp16, %bb14 ]
+  %tmp23 = sext i32 %tmp19 to i64
+  %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4)
+  %tmp25 = icmp ne i64 %tmp24, 0
+  br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26:                                             ; preds = %bb18
+  %tmp27 = call i32  @goo()
+  br label %bb30
+
+bb28:                                             ; preds = %bb18
+  %tmp29 = call i32 @hoo()
+  br label %bb30
+
+bb30:                                             ; preds = %bb28, %bb26
+  ret void
+}
+
+define void @foo_i32_xor(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3)  {
+; CHECK-LABEL: void @foo_i32_xor
+bb:
+  %tmp8 = call i32  @goo() 
+  %tmp9 = icmp sgt i32 %tmp8, %arg
+  br i1 %tmp9, label %bb10, label %bb18
+; CHECK: br i1 %tmp9,{{.*}}!prof [[WEIGHT]]
+
+bb10:                                             ; preds = %bb
+  %tmp12 = call i32  @hoo()
+  %tmp13 = icmp sgt i32 %arg1, %tmp12
+  br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13,{{.*}}!prof [[WEIGHT]]
+
+bb14:                                             ; preds = %bb10
+  %tmp16 = call i32  @too()
+  %tmp17 = icmp sgt i32 %arg2, %tmp16
+  br label %bb18
+
+bb18:                                             ; preds = %bb14, %bb10, %bb
+  %tmp19 = phi i32 [ 6, %bb10 ], [ 6, %bb ], [ %tmp16, %bb14 ]
+  %tmp20 = xor i32 %tmp19, 3
+  %tmp23 = sext i32 %tmp20 to i64
+  %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4)
+  %tmp25 = icmp ne i64 %tmp24, 0
+  br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26:                                             ; preds = %bb18
+  %tmp27 = call i32 @goo()
+  br label %bb30
+
+bb28:                                             ; preds = %bb18
+  %tmp29 = call i32 @hoo()
+  br label %bb30
+bb30:                                             ; preds = %bb28, %bb26
+  ret void
+}
+
+define void @foo_i8_sext(i32 %arg, i32 %arg1, i8 %arg2, i32 %arg3)  {
+; CHECK-LABEL: void @foo_i8_sext
+bb:
+  %tmp8 = call i32  @goo() 
+  %tmp9 = icmp sgt i32 %tmp8, %arg
+  br i1 %tmp9, label %bb10, label %bb18
+; CHECK: br i1 %tmp9,{{.*}}!prof [[WEIGHT]]
+
+bb10:                                             ; preds = %bb
+  %tmp12 = call i32  @hoo()
+  %tmp13 = icmp sgt i32 %arg1, %tmp12
+  br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13,{{.*}}!prof [[WEIGHT]]
+
+bb14:                                             ; preds = %bb10
+  %tmp16 = call i8  @too8()
+  %tmp17 = icmp sgt i8 %arg2, %tmp16
+  br label %bb18
+
+bb18:                                             ; preds = %bb14, %bb10, %bb
+  %tmp19 = phi i8 [ 255, %bb10 ], [ 255, %bb ], [ %tmp16, %bb14 ]
+  %tmp23 = sext i8 %tmp19 to i64
+; after sign extension, the operand value becomes -1 which does not match 255
+  %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 255)
+  %tmp25 = icmp ne i64 %tmp24, 0
+  br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26:                                             ; preds = %bb18
+  %tmp27 = call i32 @goo()
+  br label %bb30
+
+bb28:                                             ; preds = %bb18
+  %tmp29 = call i32 @hoo()
+  br label %bb30
+bb30:                                             ; preds = %bb28, %bb26
+  ret void
+}
+
+define void @foo_i8_sext_not_unlikely(i32 %arg, i32 %arg1, i8 %arg2, i32 %arg3)  {
+; CHECK-LABEL: void @foo_i8_sext_not_unlikely
+bb:
+  %tmp8 = call i32  @goo() 
+  %tmp9 = icmp sgt i32 %tmp8, %arg
+  br i1 %tmp9, label %bb10, label %bb18
+; CHECK: br i1 %tmp9
+; CHECK-NOT: !prof
+
+bb10:                                             ; preds = %bb
+  %tmp12 = call i32  @hoo()
+  %tmp13 = icmp sgt i32 %arg1, %tmp12
+  br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13
+; CHECK-NOT: !prof
+
+bb14:                                             ; preds = %bb10
+  %tmp16 = call i8  @too8()
+  %tmp17 = icmp sgt i8 %arg2, %tmp16
+  br label %bb18
+
+bb18:                                             ; preds = %bb14, %bb10, %bb
+  %tmp19 = phi i8 [ 255, %bb10 ], [ 255, %bb ], [ %tmp16, %bb14 ]
+  %tmp23 = sext i8 %tmp19 to i64
+; after sign extension, the operand value becomes -1 which matches -1
+  %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 -1)
+  %tmp25 = icmp ne i64 %tmp24, 0
+  br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26:                                             ; preds = %bb18
+  %tmp27 = call i32 @goo()
+  br label %bb30
+
+bb28:                                             ; preds = %bb18
+  %tmp29 = call i32 @hoo()
+  br label %bb30
+bb30:                                             ; preds = %bb28, %bb26
+  ret void
+}
+
+
+define void @foo_i32_xor_not_unlikely(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3)  {
+; CHECK-LABEL: void @foo_i32_xor_not_unlikely
+bb:
+  %tmp8 = call i32 @goo() 
+  %tmp9 = icmp sgt i32 %tmp8, %arg
+  br i1 %tmp9, label %bb10, label %bb18
+; CHECK: br i1 %tmp9
+; CHECK-NOT: !prof
+
+bb10:                                             ; preds = %bb
+  %tmp12 = call i32  @hoo()
+  %tmp13 = icmp sgt i32 %arg1, %tmp12
+  br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13
+; CHECK-NOT: !prof
+
+bb14:                                             ; preds = %bb10
+  %tmp16 = call i32 @too()
+  %tmp17 = icmp sgt i32 %arg2, %tmp16
+  br label %bb18
+
+bb18:                                             ; preds = %bb14, %bb10, %bb
+  %tmp19 = phi i32 [ 6, %bb10 ], [ 6, %bb ], [ %tmp16, %bb14 ]
+  %tmp20 = xor i32 %tmp19, 2
+  %tmp23 = sext i32 %tmp20 to i64
+  %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4)
+  %tmp25 = icmp ne i64 %tmp24, 0
+  br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26:                                             ; preds = %bb18
+  %tmp27 = call i32 @goo()
+  br label %bb30
+
+bb28:                                             ; preds = %bb18
+  %tmp29 = call i32  @hoo()
+  br label %bb30
+
+bb30:                                             ; preds = %bb28, %bb26
+  ret void
+}
+
+declare i32 @goo()
+
+declare i32 @hoo()
+
+declare i32 @too()
+
+declare i8 @too8()
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.expect.i64(i64, i64) 
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 (trunk 302965)"}
+; CHECK: [[WEIGHT]] = !{!"branch_weights", i32 2000, i32 1}
+; CHECK: [[WEIGHT2]] = !{!"branch_weights", i32 1, i32 2000}

Added: llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_or.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_or.ll (added)
+++ llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_or.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,103 @@
+; RUN: opt -lower-expect  -S -o - < %s | FileCheck %s
+; RUN: opt -S -passes='function(lower-expect)' < %s | FileCheck %s
+; 
+; if (__builtin_expect((x > goo() || y > hoo()), 1)) {
+;  ..
+; }
+; For the above case, only the second branch should be
+; annotated.
+; if (__builtin_expect((x > goo() || y > hoo()), 0)) {
+;  ..
+; }
+; For the above case, two branches should be annotated.
+; Function Attrs: noinline nounwind uwtable
+define void @foo(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3)  {
+; CHECK-LABEL: void @foo
+bb:
+  %tmp8 = call i32 @goo()
+  %tmp9 = icmp slt i32 %arg, %tmp8
+  br i1 %tmp9, label %bb14, label %bb10
+; CHECK: br i1 %tmp9
+; CHECK-NOT: br i1 %tmp9{{.*}}!prof
+
+bb10:                                             ; preds = %bb
+  %tmp12 = call i32  @hoo()
+  %tmp13 = icmp sgt i32 %arg1, %tmp12
+  br label %bb14
+
+bb14:                                             ; preds = %bb10, %bb
+  %tmp15 = phi i1 [ true, %bb ], [ %tmp13, %bb10 ]
+  %tmp16 = zext i1 %tmp15 to i32
+  %tmp17 = sext i32 %tmp16 to i64
+  %expect = call i64 @llvm.expect.i64(i64 %tmp17, i64 1)
+  %tmp18 = icmp ne i64 %expect, 0
+  br i1 %tmp18, label %bb19, label %bb21
+; CHECK: br i1 %tmp18{{.*}}!prof [[WEIGHT:![0-9]+]]
+
+bb19:                                             ; preds = %bb14
+  %tmp20 = call i32 @goo()
+  br label %bb23
+
+bb21:                                             ; preds = %bb14
+  %tmp22 = call i32  @hoo()
+  br label %bb23
+
+bb23:                                             ; preds = %bb21, %bb19
+  ret void
+}
+
+define void @foo2(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3)  {
+; CHECK-LABEL: void @foo2
+bb:
+  %tmp = alloca i32, align 4
+  %tmp4 = alloca i32, align 4
+  %tmp5 = alloca i32, align 4
+  %tmp6 = alloca i32, align 4
+  store i32 %arg, i32* %tmp, align 4
+  store i32 %arg1, i32* %tmp4, align 4
+  store i32 %arg2, i32* %tmp5, align 4
+  store i32 %arg3, i32* %tmp6, align 4
+  %tmp7 = load i32, i32* %tmp, align 4
+  %tmp8 = call i32  @goo()
+  %tmp9 = icmp slt i32 %tmp7, %tmp8
+  br i1 %tmp9, label %bb14, label %bb10
+; CHECK: br i1 %tmp9{{.*}}!prof [[WEIGHT2:![0-9]+]]
+
+bb10:                                             ; preds = %bb
+  %tmp11 = load i32, i32* %tmp5, align 4
+  %tmp12 = call i32 @hoo()
+  %tmp13 = icmp sgt i32 %tmp11, %tmp12
+  br label %bb14
+
+bb14:                                             ; preds = %bb10, %bb
+  %tmp15 = phi i1 [ true, %bb ], [ %tmp13, %bb10 ]
+  %tmp16 = zext i1 %tmp15 to i32
+  %tmp17 = sext i32 %tmp16 to i64
+  %expect = call i64 @llvm.expect.i64(i64 %tmp17, i64 0)
+  %tmp18 = icmp ne i64 %expect, 0
+  br i1 %tmp18, label %bb19, label %bb21
+; CHECK: br i1 %tmp18{{.*}}!prof [[WEIGHT2]]
+
+bb19:                                             ; preds = %bb14
+  %tmp20 = call i32 @goo()
+  br label %bb23
+
+bb21:                                             ; preds = %bb14
+  %tmp22 = call i32 @hoo()
+  br label %bb23
+
+bb23:                                             ; preds = %bb21, %bb19
+  ret void
+}
+
+declare i32 @goo() 
+declare i32 @hoo() 
+declare i64 @llvm.expect.i64(i64, i64) 
+
+
+!llvm.ident = !{!0}
+
+
+!0 = !{!"clang version 5.0.0 (trunk 302965)"}
+; CHECK: [[WEIGHT]] = !{!"branch_weights", i32 2000, i32 1}
+; CHECK: [[WEIGHT2]] = !{!"branch_weights", i32 1, i32 2000}

Added: llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_tern.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_tern.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_tern.ll (added)
+++ llvm/trunk/test/Transforms/LowerExpectIntrinsic/phi_tern.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt -lower-expect  -S -o - < %s | FileCheck %s
+; RUN: opt -S -passes='function(lower-expect)' < %s | FileCheck %s
+
+; return __builtin_expect((a > b ? 1, goo(), 0);
+;  
+; Function Attrs: noinline nounwind uwtable
+define i32 @foo(i32 %arg, i32 %arg1)  {
+; CHECK-LABEL: i32 @foo
+bb:
+  %tmp5 = icmp sgt i32 %arg, %arg1
+  br i1 %tmp5, label %bb9, label %bb7
+; CHECK: br i1 %tmp5{{.*}}!prof [[WEIGHT:![0-9]+]]
+
+bb7:                                              ; preds = %bb
+  %tmp8 = call i32 @goo()
+  br label %bb9
+
+bb9:                                              ; preds = %bb7, %bb9
+  %tmp10 = phi i32 [ 1, %bb ], [ %tmp8, %bb7 ]
+  %tmp11 = sext i32 %tmp10 to i64
+  %expect = call i64 @llvm.expect.i64(i64 %tmp11, i64 0)
+  %tmp12 = trunc i64 %expect to i32
+  ret i32 %tmp12
+}
+
+define i32 @foo2(i32 %arg, i32 %arg1)  {
+bb:
+  %tmp5 = icmp sgt i32 %arg, %arg1
+  br i1 %tmp5, label %bb6, label %bb7
+; CHECK: br i1 %tmp5{{.*}}!prof [[WEIGHT:![0-9]+]]
+
+bb6:                                              ; preds = %bb
+  br label %bb9
+
+bb7:                                              ; preds = %bb
+  %tmp8 = call i32 @goo()
+  br label %bb9
+
+bb9:                                              ; preds = %bb7, %bb6
+  %tmp10 = phi i32 [ 1, %bb6 ], [ %tmp8, %bb7 ]
+  %tmp11 = sext i32 %tmp10 to i64
+  %expect = call i64 @llvm.expect.i64(i64 %tmp11, i64 0)
+  %tmp12 = trunc i64 %expect to i32
+  ret i32 %tmp12
+}
+
+declare i32 @goo() 
+declare i64 @llvm.expect.i64(i64, i64) 
+
+
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 (trunk 302965)"}
+
+; CHECK: [[WEIGHT]] = !{!"branch_weights", i32 1, i32 2000}

Added: llvm/trunk/test/Transforms/LowerGuardIntrinsic/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerGuardIntrinsic/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerGuardIntrinsic/basic.ll (added)
+++ llvm/trunk/test/Transforms/LowerGuardIntrinsic/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,77 @@
+; RUN: opt -S -lower-guard-intrinsic < %s | FileCheck %s
+; RUN: opt -S -passes='lower-guard-intrinsic' < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define i8 @f_basic(i1* %c_ptr) {
+; CHECK-LABEL: @f_basic(
+
+  %c = load volatile i1, i1* %c_ptr
+  call void(i1, ...) @llvm.experimental.guard(i1 %c, i32 1) [ "deopt"(i32 1) ]
+  ret i8 5
+
+; CHECK:  br i1 %c, label %guarded, label %deopt, !prof !0
+; CHECK: deopt:
+; CHECK-NEXT:  %deoptcall = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i32 1) ]
+; CHECK-NEXT:  ret i8 %deoptcall
+; CHECK: guarded:
+; CHECK-NEXT:  ret i8 5
+}
+
+define void @f_void_return_ty(i1* %c_ptr) {
+; CHECK-LABEL: @f_void_return_ty(
+
+  %c = load volatile i1, i1* %c_ptr
+  call void(i1, ...) @llvm.experimental.guard(i1 %c, i32 1) [ "deopt"() ]
+  ret void
+
+; CHECK:  br i1 %c, label %guarded, label %deopt, !prof !0
+; CHECK: deopt:
+; CHECK-NEXT:  call void (...) @llvm.experimental.deoptimize.isVoid(i32 1) [ "deopt"() ]
+; CHECK-NEXT:  ret void
+; CHECK: guarded:
+; CHECK-NEXT:  ret void
+}
+
+define void @f_multiple_args(i1* %c_ptr) {
+; CHECK-LABEL: @f_multiple_args(
+
+  %c = load volatile i1, i1* %c_ptr
+  call void(i1, ...) @llvm.experimental.guard(i1 %c, i32 1, i32 2, double 500.0) [ "deopt"(i32 2, i32 3) ]
+  ret void
+
+; CHECK: br i1 %c, label %guarded, label %deopt, !prof !0
+; CHECK: deopt:
+; CHECK-NEXT:  call void (...) @llvm.experimental.deoptimize.isVoid(i32 1, i32 2, double 5.000000e+02) [ "deopt"(i32 2, i32 3) ]
+; CHECK-NEXT:  ret void
+; CHECK: guarded:
+; CHECK-NEXT:  ret void
+}
+
+define i32 @f_zero_args(i1* %c_ptr) {
+; CHECK-LABEL: @f_zero_args(
+  %c = load volatile i1, i1* %c_ptr
+  call void(i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"(i32 2, i32 3) ]
+  ret i32 500
+
+; CHECK: br i1 %c, label %guarded, label %deopt, !prof !0
+; CHECK: deopt:
+; CHECK-NEXT:  %deoptcall = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 2, i32 3) ]
+; CHECK-NEXT:  ret i32 %deoptcall
+; CHECK: guarded:
+; CHECK-NEXT:  ret i32 500
+}
+
+define i8 @f_with_make_implicit_md(i32* %ptr) {
+; CHECK-LABEL: @f_with_make_implicit_md(
+; CHECK:  br i1 %notNull, label %guarded, label %deopt, !prof !0, !make.implicit !1
+; CHECK: deopt:
+; CHECK-NEXT:  %deoptcall = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i32 1) ]
+; CHECK-NEXT:  ret i8 %deoptcall
+
+  %notNull = icmp ne i32* %ptr, null
+  call void(i1, ...) @llvm.experimental.guard(i1 %notNull, i32 1) [ "deopt"(i32 1) ], !make.implicit !{}
+  ret i8 5
+}
+
+!0 = !{!"branch_weights", i32 1048576, i32 1}

Added: llvm/trunk/test/Transforms/LowerGuardIntrinsic/with-calling-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerGuardIntrinsic/with-calling-conv.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerGuardIntrinsic/with-calling-conv.ll (added)
+++ llvm/trunk/test/Transforms/LowerGuardIntrinsic/with-calling-conv.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt -S -lower-guard-intrinsic < %s | FileCheck %s
+
+declare cc99 void @llvm.experimental.guard(i1, ...)
+
+define i8 @f_basic(i1* %c_ptr) {
+; CHECK-LABEL: @f_basic(
+; CHECK:  br i1 %c, label %guarded, label %deopt
+; CHECK: deopt:
+; CHECK-NEXT:  %deoptcall = call cc99 i8 (...) @llvm.experimental.deoptimize.i8() [ "deopt"() ]
+; CHECK-NEXT:  ret i8 %deoptcall
+
+  %c = load volatile i1, i1* %c_ptr
+  call cc99 void(i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"() ]
+  ret i8 6
+}

Added: llvm/trunk/test/Transforms/LowerInvoke/2003-12-10-Crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerInvoke/2003-12-10-Crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerInvoke/2003-12-10-Crash.ll (added)
+++ llvm/trunk/test/Transforms/LowerInvoke/2003-12-10-Crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; This testcase was reduced from Shootout-C++/reversefile.cpp by bugpoint
+
+; RUN: opt < %s -lowerinvoke -disable-output
+
+declare void @baz()
+
+declare void @bar()
+
+define void @foo() personality i32 (...)* @__gxx_personality_v0 {
+then:
+	invoke void @baz( )
+			to label %invoke_cont.0 unwind label %try_catch
+invoke_cont.0:		; preds = %then
+	invoke void @bar( )
+			to label %try_exit unwind label %try_catch
+try_catch:		; preds = %invoke_cont.0, %then
+	%__tmp.0 = phi i32* [ null, %invoke_cont.0 ], [ null, %then ]		; <i32*> [#uses=0]
+  %res = landingpad { i8* }
+          cleanup
+	ret void
+try_exit:		; preds = %invoke_cont.0
+	ret void
+}
+
+declare i32 @__gxx_personality_v0(...)

Added: llvm/trunk/test/Transforms/LowerInvoke/lowerinvoke.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerInvoke/lowerinvoke.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerInvoke/lowerinvoke.ll (added)
+++ llvm/trunk/test/Transforms/LowerInvoke/lowerinvoke.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt < %s -lowerinvoke -S | FileCheck %s
+; RUN: opt < %s -passes='lowerinvoke' -S | FileCheck %s
+
+declare i32 @external_func(i64 %arg)
+
+define i32 @invoke_test(i64 %arg) personality i8* null {
+entry:
+  %result = invoke fastcc i32 @external_func(i64 inreg %arg)
+      to label %cont unwind label %lpad
+cont:
+  ret i32 %result
+lpad:
+  %phi = phi i32 [ 99, %entry ]
+  %lp = landingpad { i8*, i32 } cleanup
+  ret i32 %phi
+}
+
+; The "invoke" should be converted to a "call".
+; CHECK-LABEL: define i32 @invoke_test
+; CHECK: %result = call fastcc i32 @external_func(i64 inreg %arg)
+; CHECK-NEXT: br label %cont
+
+; Note that this pass does not remove dead landingpad blocks.
+; CHECK: lpad:
+; CHECK-NOT: phi
+; CHECK: landingpad

Added: llvm/trunk/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll (added)
+++ llvm/trunk/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt < %s -lowerswitch
+
+define void @child(i32 %ct.1) {
+entry:
+	switch i32 0, label %return [
+		 i32 3, label %UnifiedExitNode
+		 i32 0, label %return
+	]
+return:		; preds = %entry, %entry
+	%result.0 = phi i32* [ null, %entry ], [ null, %entry ]		; <i32*> [#uses=0]
+	br label %UnifiedExitNode
+UnifiedExitNode:		; preds = %return, %entry
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll (added)
+++ llvm/trunk/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,9 @@
+; RUN: opt < %s -lowerswitch
+
+define void @test() {
+	switch i32 0, label %Next [
+	]
+Next:		; preds = %0
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll (added)
+++ llvm/trunk/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,19 @@
+; RUN: opt < %s -lowerswitch -disable-output
+
+define void @solve() {
+entry:
+	%targetBlock = call i16 @solve_code( )		; <i16> [#uses=1]
+	br label %codeReplTail
+then.1:		; preds = %codeReplTail
+	ret void
+loopexit.0:		; preds = %codeReplTail
+	ret void
+codeReplTail:		; preds = %codeReplTail, %entry
+	switch i16 %targetBlock, label %codeReplTail [
+		 i16 0, label %loopexit.0
+		 i16 1, label %then.1
+	]
+}
+
+declare i16 @solve_code()
+

Added: llvm/trunk/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll (added)
+++ llvm/trunk/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+; CHECK-NOT: icmp eq i32 %0, 1
+
+define i32 @foo(i32 %a) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  switch i32 %0, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+  ]
+
+sw.bb:
+  ret i32 12
+
+sw.bb1:
+  ret i32 4
+
+sw.bb2:
+  ret i32 2
+
+sw.default:
+  ret i32 9
+}

Added: llvm/trunk/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll (added)
+++ llvm/trunk/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+;
+; The switch is lowered with a single icmp.
+; CHECK: icmp
+; CHECK-NOT: icmp
+;
+;int foo(int a) {
+;
+;  switch (a) {
+;  case 0:
+;    return 10;
+;  case 1:
+;    return 3;
+;  default:
+;    __builtin_unreachable();
+;  }
+;
+;}
+
+define i32 @foo(i32 %a) {
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  store i32 %a, i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
+  switch i32 %3, label %6 [
+    i32 0, label %4
+    i32 1, label %5
+  ]
+
+; <label>:4 
+  store i32 10, i32* %1
+  br label %7
+
+; <label>:5
+  store i32 3, i32* %1
+  br label %7
+
+; <label>:6
+  unreachable
+
+; <label>:7
+  %8 = load i32, i32* %1
+  ret i32 %8
+}

Added: llvm/trunk/test/Transforms/LowerSwitch/2014-06-23-PHIlowering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerSwitch/2014-06-23-PHIlowering.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerSwitch/2014-06-23-PHIlowering.ll (added)
+++ llvm/trunk/test/Transforms/LowerSwitch/2014-06-23-PHIlowering.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+
+define i32 @test(i32 %arg) #0 {
+; CHECK-LABEL: @test
+; CHECK: 2:
+; CHECK-NEXT:  %res.0 = phi i32 [ 1, %NodeBlock ], [ 2, %1 ]
+; CHECK-NEXT:  br label %3
+; CHECK: 5:
+; CHECK-NEXT:   %res.3 = phi i32 [ 0, %NewDefault ], [ %res.2, %4 ]
+; CHECK-NEXT:   %6 = add nsw i32 %res.3, 1
+; CHECK-NEXT:   ret i32 %6
+
+  switch i32 %arg, label %5 [
+    i32 1, label %1
+    i32 2, label %2
+    i32 3, label %3
+    i32 4, label %4
+  ]
+
+1:
+  br label %2
+
+2:
+  %res.0 = phi i32 [ 1, %0 ], [ 2, %1 ]
+  br label %3
+
+3:
+  %res.1 = phi i32 [ 0, %0 ], [ %res.0, %2 ]
+  %phitmp = add nsw i32 %res.1, 2
+  br label %4
+
+4:
+  %res.2 = phi i32 [ 1, %0 ], [ %phitmp, %3 ]
+  br label %5
+
+5:
+  %res.3 = phi i32 [ 0, %0 ], [ %res.2, %4 ]
+  %6 = add nsw i32 %res.3, 1
+  ret i32 %6
+}

Added: llvm/trunk/test/Transforms/LowerSwitch/delete-default-block-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerSwitch/delete-default-block-crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerSwitch/delete-default-block-crash.ll (added)
+++ llvm/trunk/test/Transforms/LowerSwitch/delete-default-block-crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt < %s -lowerswitch -disable-output
+
+; This test verify -lowerswitch does not crash after deleting the default block.
+
+declare i32 @f(i32)
+
+define i32 @unreachable(i32 %x) {
+
+entry:
+  switch i32 %x, label %unreachable [
+    i32 5, label %a
+    i32 6, label %a
+    i32 7, label %a
+    i32 10, label %b
+    i32 20, label %b
+    i32 30, label %b
+    i32 40, label %b
+  ]
+unreachable:
+  unreachable
+a:
+  %0 = call i32 @f(i32 0)
+  ret i32 %0
+b:
+  %1 = call i32 @f(i32 1)
+  ret i32 %1
+}

Added: llvm/trunk/test/Transforms/LowerSwitch/do-not-handle-impossible-values.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerSwitch/do-not-handle-impossible-values.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerSwitch/do-not-handle-impossible-values.ll (added)
+++ llvm/trunk/test/Transforms/LowerSwitch/do-not-handle-impossible-values.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,895 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+
+; Check that we do not generate redundant comparisons that would have results
+; known at compile time due to limited range of the value being switch'ed over.
+define i32 @test1(i32 %val) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[VAL:%.*]] to i2
+; CHECK-NEXT:    br label [[NODEBLOCK:%.*]]
+; CHECK:       NodeBlock:
+; CHECK-NEXT:    [[PIVOT:%.*]] = icmp slt i2 [[TRUNC]], 1
+; CHECK-NEXT:    br i1 [[PIVOT]], label [[LEAFBLOCK:%.*]], label [[CASE_1:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i2 [[TRUNC]], -2
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[RES2:%.*]] = call i32 @case2()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_D:%.*]]
+; CHECK:       case.D:
+; CHECK-NEXT:    [[RESD:%.*]] = call i32 @caseD()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RES2]], [[CASE_2]] ], [ [[RESD]], [[CASE_D]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %trunc = trunc i32 %val to i2
+  switch i2 %trunc, label %case.D [
+  i2 1, label %case.1  ; i2  1
+  i2 2, label %case.2  ; i2 -2
+  ]
+  ; It's known that %val can not be less than -2 or greater than 1
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.2:
+  %res2 = call i32 @case2()
+  br label %exit
+
+case.D:
+  %resD = call i32 @caseD()
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %res2, %case.2 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Check that we do not generate redundant comparisons that would have results
+; known at compile time due to limited range of the value being switch'ed over.
+define i32 @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @getVal(), !range !0
+; CHECK-NEXT:    br label [[NODEBLOCK:%.*]]
+; CHECK:       NodeBlock:
+; CHECK-NEXT:    [[PIVOT:%.*]] = icmp slt i32 [[VAL]], 2
+; CHECK-NEXT:    br i1 [[PIVOT]], label [[CASE_1:%.*]], label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[VAL]], 2
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[RES2:%.*]] = call i32 @case2()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_D:%.*]]
+; CHECK:       case.D:
+; CHECK-NEXT:    [[RESD:%.*]] = call i32 @caseD()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RES2]], [[CASE_2]] ], [ [[RESD]], [[CASE_D]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %val = call i32 @getVal(), !range !0
+  switch i32 %val, label %case.D [
+  i32 1, label %case.1
+  i32 2, label %case.2
+  ]
+  ; It's known that %val can not be less than 1
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.2:
+  %res2 = call i32 @case2()
+  br label %exit
+
+case.D:
+  %resD = call i32 @caseD()
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %res2, %case.2 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Corner case:
+; 1) some of the non-default cases are unreachable due to the !range constraint,
+; 2) the default case is unreachable as non-default cases cover the range fully.
+define i32 @test3() {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @getVal(), !range !1
+; CHECK-NEXT:    br label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[VAL]], 2
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_1:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[RES2:%.*]] = call i32 @case2()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RES2]], [[CASE_2]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %val = call i32 @getVal(), !range !1
+  switch i32 %val, label %case.D [
+  i32 1, label %case.1
+  i32 2, label %case.2
+  i32 3, label %case.1
+  ]
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.2:
+  %res2 = call i32 @case2()
+  br label %exit
+
+case.D:
+  %resD = call i32 @caseD()
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %res2, %case.2 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Corner case:
+; 1) some of the non-default cases are unreachable due to the !range constraint,
+; 2) the default case is still reachable as non-default cases do not cover the
+;    range fully.
+define i32 @test4() {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @getVal(), !range !2
+; CHECK-NEXT:    br label [[NODEBLOCK:%.*]]
+; CHECK:       NodeBlock:
+; CHECK-NEXT:    [[PIVOT:%.*]] = icmp slt i32 [[VAL]], 2
+; CHECK-NEXT:    br i1 [[PIVOT]], label [[CASE_1:%.*]], label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[VAL]], 2
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[RES2:%.*]] = call i32 @case2()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_D:%.*]]
+; CHECK:       case.D:
+; CHECK-NEXT:    [[RESD:%.*]] = call i32 @caseD()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RES2]], [[CASE_2]] ], [ [[RESD]], [[CASE_D]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %val = call i32 @getVal(), !range !2
+  switch i32 %val, label %case.D [
+  i32 1, label %case.1
+  i32 2, label %case.2
+  ]
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.2:
+  %res2 = call i32 @case2()
+  br label %exit
+
+case.D:
+  %resD = call i32 @caseD()
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %res2, %case.2 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Corner case:
+; 1) some of the non-default cases are unreachable due to the !range constraint,
+; 2) the default case appears to be unreachable as non-default cases cover the
+;    range fully, but its basic block actually is reachable from the switch via
+;    one of the non-default cases.
+define i32 @test5(i1 %cond) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[SWITCH:%.*]], label [[CASE_D:%.*]]
+; CHECK:       switch:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @getVal(), !range !1
+; CHECK-NEXT:    br label [[NODEBLOCK:%.*]]
+; CHECK:       NodeBlock:
+; CHECK-NEXT:    [[PIVOT:%.*]] = icmp slt i32 [[VAL]], 3
+; CHECK-NEXT:    br i1 [[PIVOT]], label [[LEAFBLOCK:%.*]], label [[CASE_1:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[VAL]], 1
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_1]], label [[NEWDEFAULT:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_D]]
+; CHECK:       case.D:
+; CHECK-NEXT:    [[DELTA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 20, [[NEWDEFAULT]] ]
+; CHECK-NEXT:    [[RESD_TMP:%.*]] = call i32 @caseD()
+; CHECK-NEXT:    [[RESD:%.*]] = add i32 [[RESD_TMP]], [[DELTA]]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RESD]], [[CASE_D]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br i1 %cond, label %switch, label %case.D
+
+switch:
+  %val = call i32 @getVal(), !range !1
+  switch i32 %val, label %case.D [
+  i32 1, label %case.1
+  i32 2, label %case.D
+  i32 3, label %case.1
+  ]
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.D:
+  %delta = phi i32 [ 0, %entry ], [ 20, %switch ], [ 20, %switch ]
+  %resD.tmp = call i32 @caseD()
+  %resD = add i32 %resD.tmp, %delta
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Corner case:
+; 1) some of the non-default cases are unreachable due to the !range constraint,
+; 2) the default case appears to be unreachable as non-default cases cover the
+;    range fully, but its basic block actually is reachable, though, from a
+;    different basic block, not the switch itself.
+define i32 @test6(i1 %cond) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[SWITCH:%.*]], label [[CASE_D:%.*]]
+; CHECK:       switch:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @getVal(), !range !1
+; CHECK-NEXT:    br label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[VAL]], 2
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_1:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[RES2:%.*]] = call i32 @case2()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       case.D:
+; CHECK-NEXT:    [[RESD_TMP:%.*]] = call i32 @caseD()
+; CHECK-NEXT:    [[RESD:%.*]] = add i32 [[RESD_TMP]], 0
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RES2]], [[CASE_2]] ], [ [[RESD]], [[CASE_D]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br i1 %cond, label %switch, label %case.D
+
+switch:
+  %val = call i32 @getVal(), !range !1
+  switch i32 %val, label %case.D [
+  i32 1, label %case.1
+  i32 2, label %case.2
+  i32 3, label %case.1
+  ]
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.2:
+  %res2 = call i32 @case2()
+  br label %exit
+
+case.D:
+  %delta = phi i32 [ 0, %entry ], [ 20, %switch ]
+  %resD.tmp = call i32 @caseD()
+  %resD = add i32 %resD.tmp, %delta
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %res2, %case.2 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Corner case:
+; 1) switch appears to have a non-empty set of non-default cases, but all of
+;    them reference the default case basic block.
+define i32 @test7(i1 %cond) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[SWITCH:%.*]], label [[CASE_D:%.*]]
+; CHECK:       switch:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @getVal(), !range !1
+; CHECK-NEXT:    br label [[CASE_D]]
+; CHECK:       case.D:
+; CHECK-NEXT:    [[DELTA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 20, [[SWITCH]] ]
+; CHECK-NEXT:    [[RESD_TMP:%.*]] = call i32 @caseD()
+; CHECK-NEXT:    [[RESD:%.*]] = add i32 [[RESD_TMP]], [[DELTA]]
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[RESD]]
+;
+entry:
+  br i1 %cond, label %switch, label %case.D
+
+switch:
+  %val = call i32 @getVal(), !range !1
+  switch i32 %val, label %case.D [
+  i32 2, label %case.D
+  ]
+
+case.D:
+  %delta = phi i32 [ 0, %entry ], [ 20, %switch ], [ 20, %switch ]
+  %resD.tmp = call i32 @caseD()
+  %resD = add i32 %resD.tmp, %delta
+  br label %exit
+
+exit:
+  ret i32 %resD
+}
+
+; Corner case:
+; 1) some of the non-default cases are unreachable due to the !range constraint,
+; 2) the default case appears to be unreachable as non-default cases cover the
+;    range fully, but its basic block actually is reachable from the switch via
+;    one of the non-default cases,
+; 3) such cases lie at the boundary of the range of values covered by
+;    non-default cases, and if removed, do not change the fact that the rest of
+;    the cases fully covers the value range.
+define i32 @test8(i1 %cond) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[SWITCH:%.*]], label [[CASE_D:%.*]]
+; CHECK:       switch:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @getVal(), !range !3
+; CHECK-NEXT:    br label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[VAL]], 2
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_1:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[RES2:%.*]] = call i32 @case2()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       case.D:
+; CHECK-NEXT:    [[RESD_TMP:%.*]] = call i32 @caseD()
+; CHECK-NEXT:    [[RESD:%.*]] = add i32 [[RESD_TMP]], 0
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RES2]], [[CASE_2]] ], [ [[RESD]], [[CASE_D]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br i1 %cond, label %switch, label %case.D
+
+switch:
+  %val = call i32 @getVal(), !range !3
+  switch i32 %val, label %case.D [
+  i32 1, label %case.1
+  i32 2, label %case.2
+  i32 3, label %case.D
+  ]
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.2:
+  %res2 = call i32 @case2()
+  br label %exit
+
+case.D:
+  %delta = phi i32 [ 0, %entry ], [ 20, %switch ], [ 20, %switch ]
+  %resD.tmp = call i32 @caseD()
+  %resD = add i32 %resD.tmp, %delta
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %res2, %case.2 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Corner case:
+; 1) the default case appears to be unreachable as non-default cases cover the
+;    range fully, but its basic block actually is reachable from the switch via
+;    more than one non-default case.
+define i32 @test9(i1 %cond, i2 %val) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[SWITCH:%.*]], label [[CASE_D:%.*]]
+; CHECK:       switch:
+; CHECK-NEXT:    br label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp sge i2 [[VAL:%.*]], 0
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_1:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_D]]
+; CHECK:       case.D:
+; CHECK-NEXT:    [[DELTA:%.*]] = phi i32 [ 20, [[NEWDEFAULT]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RESD_TMP:%.*]] = call i32 @caseD()
+; CHECK-NEXT:    [[RESD:%.*]] = add i32 [[RESD_TMP]], [[DELTA]]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RESD]], [[CASE_D]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br i1 %cond, label %switch, label %case.D
+
+switch:
+  switch i2 %val, label %case.D [
+  i2 0, label %case.1
+  i2 1, label %case.1
+  i2 2, label %case.D
+  i2 3, label %case.D
+  ]
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.D:
+  %delta = phi i32 [20, %switch ], [ 20, %switch ], [ 20, %switch ], [ 0, %entry ]
+  %resD.tmp = call i32 @caseD()
+  %resD = add i32 %resD.tmp, %delta
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Check that we do not generate redundant comparisons that would have results
+; known at compile time due to limited range of the value being switch'ed over.
+define i32 @test10() {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @getVal()
+; CHECK-NEXT:    [[COND_LEFT:%.*]] = icmp sge i32 [[VAL]], 1
+; CHECK-NEXT:    [[COND_RIGHT:%.*]] = icmp sle i32 [[VAL]], 6
+; CHECK-NEXT:    [[COND:%.*]] = and i1 [[COND_LEFT]], [[COND_RIGHT]]
+; CHECK-NEXT:    br i1 [[COND]], label [[SWITCH:%.*]], label [[CASE_D:%.*]]
+; CHECK:       switch:
+; CHECK-NEXT:    br label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[VAL_OFF:%.*]] = add i32 [[VAL]], -3
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp ule i32 [[VAL_OFF]], 1
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_1:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[RES2:%.*]] = call i32 @case2()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       case.D:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RES2]], [[CASE_2]] ], [ 0, [[CASE_D]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %val = call i32 @getVal()
+  %cond.left = icmp sge i32 %val, 1
+  %cond.right = icmp sle i32 %val, 6
+  %cond = and i1 %cond.left, %cond.right
+  br i1 %cond, label %switch, label %case.D
+
+switch:
+  switch i32 %val, label %case.D [
+  i32 1, label %case.1
+  i32 2, label %case.1
+  i32 3, label %case.2
+  i32 4, label %case.2
+  i32 5, label %case.1
+  i32 6, label %case.1
+  ]
+  ; It's known that %val <- [1, 6]
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.2:
+  %res2 = call i32 @case2()
+  br label %exit
+
+case.D:
+  %resD = phi i32 [ 20, %switch ], [ 0, %entry ]
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %res2, %case.2 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Check that we do not generate redundant comparisons that would have results
+; known at compile time due to limited range of the value being switch'ed over.
+define i32 @test11() {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @getVal()
+; CHECK-NEXT:    [[VAL_ZEXT:%.*]] = zext i32 [[VAL]] to i64
+; CHECK-NEXT:    br label [[NODEBLOCK:%.*]]
+; CHECK:       NodeBlock:
+; CHECK-NEXT:    [[PIVOT:%.*]] = icmp slt i64 [[VAL_ZEXT]], 1
+; CHECK-NEXT:    br i1 [[PIVOT]], label [[CASE_1:%.*]], label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i64 [[VAL_ZEXT]], 1
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[RES2:%.*]] = call i32 @case2()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_D:%.*]]
+; CHECK:       case.D:
+; CHECK-NEXT:    [[RESD:%.*]] = call i32 @caseD()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RES2]], [[CASE_2]] ], [ [[RESD]], [[CASE_D]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %val = call i32 @getVal()
+  %val.zext = zext i32 %val to i64
+  switch i64 %val.zext, label %case.D [
+  i64 0, label %case.1
+  i64 1, label %case.2
+  ]
+  ; It's known that %val can not be less than 0
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.2:
+  %res2 = call i32 @case2()
+  br label %exit
+
+case.D:
+  %resD = call i32 @caseD()
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %res2, %case.2 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Check that we do not generate redundant comparisons that would have results
+; known at compile time due to limited range of the value being switch'ed over.
+define void @test12() {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    br label [[NODEBLOCK:%.*]]
+; CHECK:       NodeBlock:
+; CHECK-NEXT:    [[PIVOT:%.*]] = icmp slt i32 [[INDVAR]], 1
+; CHECK-NEXT:    br i1 [[PIVOT]], label [[CASE_1:%.*]], label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[INDVAR]], 1
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    br label [[LATCH]]
+; CHECK:       case.2:
+; CHECK-NEXT:    br label [[LATCH]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[LATCH]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[INDVAR]], 1
+; CHECK-NEXT:    br i1 undef, label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvar = phi i32 [ 0, %entry ], [ %inc, %latch ]
+  switch i32 %indvar, label %latch [
+  i32 0, label %case.1
+  i32 1, label %case.2
+  ]
+  ; It's known that %indvar can not be less than 0
+
+case.1:
+  br label %latch
+
+case.2:
+  br label %latch
+
+latch:
+  %inc = add nuw nsw i32 %indvar, 1
+  br i1 undef, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+; Check that we do not generate redundant comparisons that would have results
+; known at compile time due to limited range of the value being switch'ed over.
+define void @test13(i32 %val) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = and i32 [[VAL:%.*]], 7
+; CHECK-NEXT:    br label [[BB33:%.*]]
+; CHECK:       bb33:
+; CHECK-NEXT:    br label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[TMP_OFF:%.*]] = add i32 [[TMP]], -2
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp ule i32 [[TMP_OFF]], 1
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[BB34:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       bb34:
+; CHECK-NEXT:    br label [[BB38:%.*]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[BB35:%.*]]
+; CHECK:       bb35:
+; CHECK-NEXT:    br label [[NODEBLOCK:%.*]]
+; CHECK:       NodeBlock:
+; CHECK-NEXT:    [[PIVOT:%.*]] = icmp slt i32 [[TMP]], 6
+; CHECK-NEXT:    br i1 [[PIVOT]], label [[LEAFBLOCK2:%.*]], label [[BB37:%.*]]
+; CHECK:       LeafBlock2:
+; CHECK-NEXT:    [[SWITCHLEAF3:%.*]] = icmp sle i32 [[TMP]], 1
+; CHECK-NEXT:    br i1 [[SWITCHLEAF3]], label [[BB37]], label [[NEWDEFAULT1:%.*]]
+; CHECK:       bb37:
+; CHECK-NEXT:    br label [[BB38]]
+; CHECK:       NewDefault1:
+; CHECK-NEXT:    br label [[BB38]]
+; CHECK:       bb38:
+; CHECK-NEXT:    br label [[BB33]]
+;
+entry:
+  %tmp = and i32 %val, 7
+  br label %bb33
+
+bb33:
+  switch i32 %tmp, label %bb35 [
+  i32 2, label %bb34
+  i32 3, label %bb34
+  ]
+
+bb34:
+  br label %bb38
+
+bb35:
+  switch i32 %tmp, label %bb38 [
+  i32 0, label %bb37
+  i32 1, label %bb37
+  i32 6, label %bb37
+  i32 7, label %bb37
+  ]
+  ; It's known that %tmp <- [0, 1] U [4, 7]
+
+bb37:
+  br label %bb38
+
+bb38:
+  br label %bb33
+}
+
+; Check that we do not generate redundant comparisons that would have results
+; known at compile time due to limited range of the value being switch'ed over.
+define i32 @test14() {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = call i32 @getVal(), !range !4
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP]])
+; CHECK-NEXT:    br label [[NODEBLOCK:%.*]]
+; CHECK:       NodeBlock:
+; CHECK-NEXT:    [[PIVOT:%.*]] = icmp slt i32 [[VAL]], 1
+; CHECK-NEXT:    br i1 [[PIVOT]], label [[CASE_1:%.*]], label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[VAL]], 1
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[RES2:%.*]] = call i32 @case2()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_D:%.*]]
+; CHECK:       case.D:
+; CHECK-NEXT:    [[RESD:%.*]] = call i32 @caseD()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RES2]], [[CASE_2]] ], [ [[RESD]], [[CASE_D]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %tmp = call i32 @getVal(), !range !4
+  %val = call i32 @llvm.ctpop.i32(i32 %tmp)
+  switch i32 %val, label %case.D [
+  i32 0, label %case.1
+  i32 1, label %case.2
+  ]
+  ; It's known that %val <- [0, 2]
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.2:
+  %res2 = call i32 @case2()
+  br label %exit
+
+case.D:
+  %resD = call i32 @caseD()
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %res2, %case.2 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Check that we do not generate redundant comparisons that would have results
+; known at compile time due to limited range of the value being switch'ed over.
+define i32 @test15() {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = call i32 @getVal()
+; CHECK-NEXT:    [[VAL:%.*]] = urem i32 [[TMP]], 3
+; CHECK-NEXT:    br label [[NODEBLOCK:%.*]]
+; CHECK:       NodeBlock:
+; CHECK-NEXT:    [[PIVOT:%.*]] = icmp slt i32 [[VAL]], 1
+; CHECK-NEXT:    br i1 [[PIVOT]], label [[CASE_1:%.*]], label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[VAL]], 1
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[RES2:%.*]] = call i32 @case2()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_D:%.*]]
+; CHECK:       case.D:
+; CHECK-NEXT:    [[RESD:%.*]] = call i32 @caseD()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RES2]], [[CASE_2]] ], [ [[RESD]], [[CASE_D]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %tmp = call i32 @getVal()
+  %val = urem i32 %tmp, 3
+  switch i32 %val, label %case.D [
+  i32 0, label %case.1
+  i32 1, label %case.2
+  ]
+  ; It's known that %val <- [0, 2]
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.2:
+  %res2 = call i32 @case2()
+  br label %exit
+
+case.D:
+  %resD = call i32 @caseD()
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %res2, %case.2 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+; Check that we do not generate redundant comparisons that would have results
+; known at compile time due to limited range of the value being switch'ed over.
+define i32 @test16(float %f) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I:%.*]] = fptosi float [[F:%.*]] to i64
+; CHECK-NEXT:    [[COND_LEFT:%.*]] = icmp slt i64 [[I]], 0
+; CHECK-NEXT:    [[CLAMP_LEFT:%.*]] = select i1 [[COND_LEFT]], i64 0, i64 [[I]]
+; CHECK-NEXT:    [[COND_RIGHT:%.*]] = icmp sgt i64 [[I]], 3
+; CHECK-NEXT:    [[CLAMP:%.*]] = select i1 [[COND_RIGHT]], i64 3, i64 [[CLAMP_LEFT]]
+; CHECK-NEXT:    br label [[LEAFBLOCK:%.*]]
+; CHECK:       LeafBlock:
+; CHECK-NEXT:    [[SWITCHLEAF:%.*]] = icmp sge i64 [[CLAMP]], 2
+; CHECK-NEXT:    br i1 [[SWITCHLEAF]], label [[CASE_2:%.*]], label [[NEWDEFAULT:%.*]]
+; CHECK:       NewDefault:
+; CHECK-NEXT:    br label [[CASE_1:%.*]]
+; CHECK:       case.1:
+; CHECK-NEXT:    [[RES1:%.*]] = call i32 @case1()
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[RES2:%.*]] = call i32 @case2()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES1]], [[CASE_1]] ], [ [[RES2]], [[CASE_2]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %i = fptosi float %f to i64
+  %cond.left = icmp slt i64 %i, 0
+  %clamp.left = select i1 %cond.left, i64 0, i64 %i
+  %cond.right = icmp sgt i64 %i, 3
+  %clamp = select i1 %cond.right, i64 3, i64 %clamp.left
+  switch i64 %clamp, label %case.D [
+  i64 0, label %case.1
+  i64 1, label %case.1
+  i64 2, label %case.2
+  i64 3, label %case.2
+  ]
+  ; It's known that %val <- [0, 3]
+
+case.1:
+  %res1 = call i32 @case1()
+  br label %exit
+
+case.2:
+  %res2 = call i32 @case2()
+  br label %exit
+
+case.D:
+  %resD = call i32 @caseD()
+  br label %exit
+
+exit:
+  %res = phi i32 [ %res1, %case.1 ], [ %res2, %case.2 ], [ %resD, %case.D ]
+  ret i32 %res
+}
+
+declare i32 @case1()
+declare i32 @case2()
+declare i32 @caseD()
+declare i32 @getVal()
+declare i32 @llvm.ctpop.i32(i32)
+
+!0 = !{i32 1, i32 257}
+!1 = !{i32 2, i32 3}
+!2 = !{i32 2, i32 257}
+!3 = !{i32 1, i32 3}
+!4 = !{i32 0, i32 4}

Added: llvm/trunk/test/Transforms/LowerSwitch/feature.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerSwitch/feature.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerSwitch/feature.ll (added)
+++ llvm/trunk/test/Transforms/LowerSwitch/feature.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,104 @@
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+
+; We have switch on input.
+; On output we should got binary comparison tree. Check that all is fine.
+
+;CHECK:     entry:
+;CHECK-NEXT:  br label %NodeBlock19
+
+;CHECK:     NodeBlock19:                                      ; preds = %entry
+;CHECK-NEXT:  %Pivot20 = icmp slt i32 %tmp158, 10
+;CHECK-NEXT:  br i1 %Pivot20, label %NodeBlock5, label %NodeBlock17
+
+;CHECK:     NodeBlock17:                                      ; preds = %NodeBlock19
+;CHECK-NEXT:  %Pivot18 = icmp slt i32 %tmp158, 13
+;CHECK-NEXT:  br i1 %Pivot18, label %NodeBlock9, label %NodeBlock15
+
+;CHECK:     NodeBlock15:                                      ; preds = %NodeBlock17
+;CHECK-NEXT:  %Pivot16 = icmp slt i32 %tmp158, 14
+;CHECK-NEXT:  br i1 %Pivot16, label %bb330, label %NodeBlock13
+
+;CHECK:     NodeBlock13:                                      ; preds = %NodeBlock15
+;CHECK-NEXT:  %Pivot14 = icmp slt i32 %tmp158, 15
+;CHECK-NEXT:  br i1 %Pivot14, label %bb332, label %LeafBlock11
+
+;CHECK:     LeafBlock11:                                      ; preds = %NodeBlock13
+;CHECK-NEXT:  %SwitchLeaf12 = icmp eq i32 %tmp158, 15
+;CHECK-NEXT:  br i1 %SwitchLeaf12, label %bb334, label %NewDefault
+
+;CHECK:     NodeBlock9:                                       ; preds = %NodeBlock17
+;CHECK-NEXT:  %Pivot10 = icmp slt i32 %tmp158, 11
+;CHECK-NEXT:  br i1 %Pivot10, label %bb324, label %NodeBlock7
+
+;CHECK:     NodeBlock7:                                       ; preds = %NodeBlock9
+;CHECK-NEXT:  %Pivot8 = icmp slt i32 %tmp158, 12
+;CHECK-NEXT:  br i1 %Pivot8, label %bb326, label %bb328
+
+;CHECK:     NodeBlock5:                                       ; preds = %NodeBlock19
+;CHECK-NEXT:  %Pivot6 = icmp slt i32 %tmp158, 7
+;CHECK-NEXT:  br i1 %Pivot6, label %NodeBlock, label %NodeBlock3
+
+;CHECK:     NodeBlock3:                                       ; preds = %NodeBlock5
+;CHECK-NEXT:  %Pivot4 = icmp slt i32 %tmp158, 8
+;CHECK-NEXT:  br i1 %Pivot4, label %bb, label %NodeBlock1
+
+;CHECK:     NodeBlock1:                                       ; preds = %NodeBlock3
+;CHECK-NEXT:  %Pivot2 = icmp slt i32 %tmp158, 9
+;CHECK-NEXT:  br i1 %Pivot2, label %bb338, label %bb322
+
+;CHECK:     NodeBlock:                                        ; preds = %NodeBlock5
+;CHECK-NEXT:  %Pivot = icmp slt i32 %tmp158, 0
+;CHECK-NEXT:  br i1 %Pivot, label %LeafBlock, label %bb338
+
+;CHECK:     LeafBlock:                                        ; preds = %NodeBlock
+;CHECK-NEXT:  %tmp158.off = add i32 %tmp158, 6
+;CHECK-NEXT:  %SwitchLeaf = icmp ule i32 %tmp158.off, 4
+;CHECK-NEXT:  br i1 %SwitchLeaf, label %bb338, label %NewDefault
+
+define i32 @main(i32 %tmp158) {
+entry:
+
+        switch i32 %tmp158, label %bb336 [
+                 i32 -2, label %bb338
+                 i32 -3, label %bb338
+                 i32 -4, label %bb338
+                 i32 -5, label %bb338
+                 i32 -6, label %bb338
+                 i32 0, label %bb338
+                 i32 1, label %bb338
+                 i32 2, label %bb338
+                 i32 3, label %bb338
+                 i32 4, label %bb338
+                 i32 5, label %bb338
+                 i32 6, label %bb338
+                 i32 7, label %bb
+                 i32 8, label %bb338
+                 i32 9, label %bb322
+                 i32 10, label %bb324
+                 i32 11, label %bb326
+                 i32 12, label %bb328
+                 i32 13, label %bb330
+                 i32 14, label %bb332
+                 i32 15, label %bb334
+        ]
+bb:
+  ret i32 2
+bb322:
+  ret i32 3
+bb324:
+  ret i32 4
+bb326:
+  ret i32 5
+bb328:
+  ret i32 6
+bb330:
+  ret i32 7
+bb332:
+  ret i32 8
+bb334:
+  ret i32 9
+bb336:
+  ret i32 10
+bb338:
+  ret i32 11
+}

Added: llvm/trunk/test/Transforms/LowerSwitch/fold-popular-case-to-unreachable-default.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerSwitch/fold-popular-case-to-unreachable-default.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerSwitch/fold-popular-case-to-unreachable-default.ll (added)
+++ llvm/trunk/test/Transforms/LowerSwitch/fold-popular-case-to-unreachable-default.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,110 @@
+; RUN: opt %s -lowerswitch -S | FileCheck %s
+
+define void @foo(i32 %x, i32* %p) {
+; Cases 2 and 4 are removed and become the new default case.
+; It is now enough to use two icmps to lower the switch.
+;
+; CHECK-LABEL: @foo
+; CHECK:       icmp slt i32 %x, 5
+; CHECK:       icmp eq i32 %x, 1
+; CHECK-NOT:   icmp
+;
+entry:
+  switch i32 %x, label %default [
+    i32 1, label %bb0
+    i32 2, label %popular
+    i32 4, label %popular
+    i32 5, label %bb1
+  ]
+bb0:
+  store i32 0, i32* %p
+  br label %exit
+bb1:
+  store i32 1, i32* %p
+  br label %exit
+popular:
+  store i32 2, i32* %p
+  br label %exit
+exit:
+  ret void
+default:
+  unreachable
+}
+
+define void @unreachable_gap(i64 %x, i32* %p) {
+; Cases 6 and INT64_MAX become the new default, but we still exploit the fact
+; that 3-4 is unreachable, so four icmps is enough.
+
+; CHECK-LABEL: @unreachable_gap
+; CHECK:       icmp slt i64 %x, 2
+; CHECK:       icmp slt i64 %x, 5
+; CHECK:       icmp eq  i64 %x, 5
+; CHECK:       icmp slt i64 %x, 1
+; CHECK-NOT:   icmp
+
+entry:
+  switch i64 %x, label %default [
+    i64 -9223372036854775808, label %bb0
+    i64 1, label %bb1
+    i64 2, label %bb2
+    i64 5, label %bb3
+    i64 6, label %bb4
+    i64 9223372036854775807, label %bb4
+  ]
+bb0:
+  store i32 0, i32* %p
+  br label %exit
+bb1:
+  store i32 1, i32* %p
+  br label %exit
+bb2:
+  store i32 2, i32* %p
+  br label %exit
+bb3:
+  store i32 3, i32* %p
+  br label %exit
+bb4:
+  store i32 4, i32* %p
+  br label %exit
+exit:
+  ret void
+default:
+  unreachable
+}
+
+
+
+define void @nocases(i32 %x, i32* %p) {
+; Don't fall over when there are no cases.
+;
+; CHECK-LABEL: @nocases
+; CHECK-LABEL: entry
+; CHECK-NEXT:  br label %default
+;
+entry:
+  switch i32 %x, label %default [
+  ]
+default:
+  unreachable
+}
+
+define void @nocasesleft(i32 %x, i32* %p) {
+; Cases 2 and 4 are removed and we are left with no cases.
+;
+; CHECK-LABEL: @nocasesleft
+; CHECK-LABEL: entry
+; CHECK-NEXT:  br label %popular
+;
+entry:
+  switch i32 %x, label %default [
+    i32 2, label %popular
+    i32 4, label %popular
+  ]
+popular:
+  store i32 2, i32* %p
+  br label %exit
+exit:
+  ret void
+default:
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LowerSwitch/phi-in-dead-block.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerSwitch/phi-in-dead-block.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerSwitch/phi-in-dead-block.ll (added)
+++ llvm/trunk/test/Transforms/LowerSwitch/phi-in-dead-block.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt -S -lowerswitch %s | FileCheck %s
+
+; CHECK-LABEL: @phi_in_dead_block(
+; CHECK-NOT: switch
+define void @phi_in_dead_block() {
+bb:
+  br i1 undef, label %bb2, label %bb3
+
+bb1:                                              ; No predecessors!
+  switch i32 undef, label %bb2 [
+    i32 9, label %bb3
+  ]
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp = phi i64 [ undef, %bb1 ], [ undef, %bb ]
+  unreachable
+
+bb3:                                              ; preds = %bb1, %bb
+  unreachable
+}
+
+; CHECK-LABEL: @phi_in_dead_block_br_to_self(
+; CHECK-NOT: switch
+define void @phi_in_dead_block_br_to_self() {
+bb:
+  br i1 undef, label %bb2, label %bb3
+
+bb1:                                              ; No predecessors!
+  switch i32 undef, label %bb2 [
+    i32 9, label %bb3
+    i32 10, label %bb1
+  ]
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp = phi i64 [ undef, %bb1 ], [ undef, %bb ]
+  unreachable
+
+bb3:                                              ; preds = %bb1, %bb
+  unreachable
+}

Added: llvm/trunk/test/Transforms/LowerTypeTests/Inputs/blockaddr-import.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/Inputs/blockaddr-import.yaml?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/Inputs/blockaddr-import.yaml (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/Inputs/blockaddr-import.yaml Tue Apr 16 21:52:47 2019
@@ -0,0 +1,9 @@
+---
+TypeIdMap:
+  typeid1:
+    TTRes:
+      Kind:            AllOnes
+CfiFunctionDefs:
+  - m
+CfiFunctionDecls:
+...

Added: llvm/trunk/test/Transforms/LowerTypeTests/Inputs/cfi-direct-call.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/Inputs/cfi-direct-call.yaml?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/Inputs/cfi-direct-call.yaml (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/Inputs/cfi-direct-call.yaml Tue Apr 16 21:52:47 2019
@@ -0,0 +1,12 @@
+---
+TypeIdMap:
+  typeid1:
+    TTRes:
+      Kind:            AllOnes
+CfiFunctionDefs:
+  - internal_default_def
+  - internal_hidden_def
+  - dsolocal_default_def
+CfiFunctionDecls:
+  - external_decl
+...

Added: llvm/trunk/test/Transforms/LowerTypeTests/Inputs/cfi-direct-call1.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/Inputs/cfi-direct-call1.yaml?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/Inputs/cfi-direct-call1.yaml (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/Inputs/cfi-direct-call1.yaml Tue Apr 16 21:52:47 2019
@@ -0,0 +1,13 @@
+---
+TypeIdMap:
+  typeid1:
+    TTRes:
+      Kind:            AllOnes
+CfiFunctionDefs:
+  - local_func1
+  - local_func2
+  - local_func3
+CfiFunctionDecls:
+  - extern_decl
+  - extern_weak
+...

Added: llvm/trunk/test/Transforms/LowerTypeTests/Inputs/export-icall.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/Inputs/export-icall.yaml?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/Inputs/export-icall.yaml (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/Inputs/export-icall.yaml Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+---
+GlobalValueMap:
+  42:
+    - Live: true
+      # guid("f"), guid("f2"), guid("f3"), guid("g"), guid("h"), guid("external"), guid("external_weak")
+      Refs: [14740650423002898831, 8471399308421654326, 4197650231481825559, 13146401226427987378, 8124147457056772133, 5224464028922159466, 5227079976482001346]
+      TypeTests: [14276520915468743435, 15427464259790519041] # guid("typeid1"), guid("typeid2")
+  14740650423002898831: # guid("f")
+    - Linkage: 0 # external
+      Live: true
+  8471399308421654326: # guid("f2")
+    - Linkage: 0 # external
+      Live: true
+  4197650231481825559: # guid("f3")
+    - Linkage: 0 # external
+      Live: true
+  13146401226427987378: # guid("g")
+    - Linkage: 0 # external
+      Live: true
+  8124147457056772133: # guid("h")
+    - Linkage: 0 # external
+      Live: true
+  5224464028922159466: # guid("external")
+    - Linkage: 0 # external
+      Live: true
+  5227079976482001346: # guid("external_weak")
+    - Linkage: 9 # extern_weak
+      Live: true
+...

Added: llvm/trunk/test/Transforms/LowerTypeTests/Inputs/exported-funcs.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/Inputs/exported-funcs.yaml?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/Inputs/exported-funcs.yaml (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/Inputs/exported-funcs.yaml Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+---
+GlobalValueMap:
+  42:
+    - Live: true
+      Refs: [16594175687743574550, 2415377257478301385] # guid("external_addrtaken"), guid("external_addrtaken2")
+      TypeTests: [14276520915468743435, 15427464259790519041] # guid("typeid1"), guid("typeid2")
+  5224464028922159466: # guid("external")
+    - Linkage: 0 # external
+      Live: true
+  16430208882958242304: # guid("external2")
+    - Linkage: 0 # external
+      Live: true
+  16594175687743574550: # guid("external_addrtaken")
+    - Linkage: 0 # external
+      Live: true
+  2415377257478301385: # guid("external_addrtaken2")
+    - Linkage: 0 # external
+      Live: true
+  15859245615183425489: # guid("internal")
+    - Linkage: 7 # internal
+      Live: true
+...

Added: llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-alias.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-alias.yaml?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-alias.yaml (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-alias.yaml Tue Apr 16 21:52:47 2019
@@ -0,0 +1,11 @@
+---
+TypeIdMap:
+  typeid1:
+    TTRes:
+      Kind:            AllOnes
+      SizeM1BitWidth:  7
+WithGlobalValueDeadStripping: false
+CfiFunctionDefs:
+  - f
+CfiFunctionDecls:
+...

Added: llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-icall.yaml Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+---
+TypeIdMap:
+  typeid1:
+    TTRes:
+      Kind:            AllOnes
+      SizeM1BitWidth:  7
+  typeid2:
+    TTRes:
+      Kind:            Single
+      SizeM1BitWidth:  0
+WithGlobalValueDeadStripping: false
+CfiFunctionDefs:
+  - local_a
+  - local_b
+  - does_not_exist
+CfiFunctionDecls:
+  - external
+  - external_weak
+  - local_decl
+...

Added: llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml Tue Apr 16 21:52:47 2019
@@ -0,0 +1,11 @@
+---
+GlobalValueMap:
+  42:
+    - Live: true
+      TypeTests: [123]
+TypeIdMap:
+  typeid1:
+    TTRes:
+      Kind: Unsat
+      SizeM1BitWidth: 0
+...

Added: llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import.yaml?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import.yaml (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/Inputs/import.yaml Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+---
+TypeIdMap:
+  allones7:
+    TTRes:
+      Kind: AllOnes
+      SizeM1BitWidth: 7
+      AlignLog2: 1
+      SizeM1: 42
+  allones32:
+    TTRes:
+      Kind: AllOnes
+      SizeM1BitWidth: 32
+      AlignLog2: 2
+      SizeM1: 12345
+  bytearray7:
+    TTRes:
+      Kind: ByteArray
+      SizeM1BitWidth: 7
+      AlignLog2: 3
+      SizeM1: 43
+      BitMask: 64
+  bytearray32:
+    TTRes:
+      Kind: ByteArray
+      SizeM1BitWidth: 32
+      AlignLog2: 4
+      SizeM1: 12346
+      BitMask: 128
+  inline5:
+    TTRes:
+      Kind: Inline
+      SizeM1BitWidth: 5
+      AlignLog2: 5
+      SizeM1: 31
+      InlineBits: 123
+  inline6:
+    TTRes:
+      Kind: Inline
+      SizeM1BitWidth: 6
+      AlignLog2: 6
+      SizeM1: 63
+      InlineBits: 1000000000000
+  single:
+    TTRes:
+      Kind: Single
+      SizeM1BitWidth: 0
+...

Added: llvm/trunk/test/Transforms/LowerTypeTests/Inputs/use-typeid1-dead.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/Inputs/use-typeid1-dead.yaml?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/Inputs/use-typeid1-dead.yaml (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/Inputs/use-typeid1-dead.yaml Tue Apr 16 21:52:47 2019
@@ -0,0 +1,7 @@
+---
+GlobalValueMap:
+  42:
+    - Live: false
+      TypeTests: [14276520915468743435] # guid("typeid1")
+WithGlobalValueDeadStripping: true
+...

Added: llvm/trunk/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml Tue Apr 16 21:52:47 2019
@@ -0,0 +1,6 @@
+---
+GlobalValueMap:
+  42:
+    - Live: true
+      TypeTests: [14276520915468743435, 15427464259790519041] # guid("typeid1"), guid("typeid2")
+...

Added: llvm/trunk/test/Transforms/LowerTypeTests/blockaddr-import.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/blockaddr-import.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/blockaddr-import.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/blockaddr-import.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%p/Inputs/blockaddr-import.yaml %s -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux"
+
+declare i1 @llvm.type.test(i8*, metadata) #1
+declare !type !11 i32 @o(...)
+
+define hidden void @m() #0 !type !3 {
+entry:
+  br label %n
+n:
+  %call = tail call i32 (i8*, ...) bitcast (i32 (...)* @o to i32 (i8*, ...)*)(i8* blockaddress(@m, %n)) #4
+; Make sure that blockaddress refers to the new function, m.cfi
+; CHECK: define hidden void @m.cfi()
+; CHECK: blockaddress(@m.cfi, %n)
+
+  ret void
+}
+
+!3 = !{i64 0, !"_ZTSFvE"}
+!11 = !{i64 0, !"_ZTSFiE"}

Added: llvm/trunk/test/Transforms/LowerTypeTests/blockaddress-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/blockaddress-2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/blockaddress-2.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/blockaddress-2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt -S %s -lowertypetests | FileCheck %s
+
+; CHECK: @badfileops = internal global %struct.f { void ()* @bad_f, void ()* @bad_f }
+; CHECK: @bad_f = internal alias void (), void ()* @.cfi.jumptable
+; CHECK: define internal void @bad_f.cfi() !type !0 {
+; CHECK-NEXT:  ret void
+
+target triple = "x86_64-unknown-linux"
+
+%struct.f = type { void ()*, void ()* }
+ at badfileops = internal global %struct.f { void ()* @bad_f, void ()* @bad_f }, align 8
+
+declare i1 @llvm.type.test(i8*, metadata)
+
+define internal void @bad_f() !type !1 {
+  ret void
+}
+
+define internal fastcc void @do_f() unnamed_addr !type !2 {
+  %1 = tail call i1 @llvm.type.test(i8* undef, metadata !"_ZTSFiP4fileP3uioP5ucrediP6threadE"), !nosanitize !3
+  ret void
+}
+
+!1 = !{i64 0, !"_ZTSFiP4fileP3uioP5ucrediP6threadE"}
+!2 = !{i64 0, !"_ZTSFiP6threadiP4fileP3uioliE"}
+!3 = !{}

Added: llvm/trunk/test/Transforms/LowerTypeTests/blockaddress.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/blockaddress.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/blockaddress.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/blockaddress.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt -S %s -lowertypetests | FileCheck %s
+
+
+; CHECK: define hidden i8* @f2.cfi() !type !0 {
+; CHECK-NEXT:  br label %b
+; CHECK: b:
+; CHECK-NEXT:  ret i8* blockaddress(@f2.cfi, %b)
+; CHECK-NEXT: }
+
+target triple = "x86_64-unknown-linux"
+
+define void @f1() {
+entry:
+  %0 = call i1 @llvm.type.test(i8* bitcast (i8* ()* @f2 to i8*), metadata !"_ZTSFvP3bioE")
+  ret void
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+
+define i8* @f2() !type !5 {
+  br label %b
+
+b:
+  ret i8* blockaddress(@f2, %b)
+}
+
+!5 = !{i64 0, !"_ZTSFvP3bioE"}

Added: llvm/trunk/test/Transforms/LowerTypeTests/cfi-direct-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/cfi-direct-call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/cfi-direct-call.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/cfi-direct-call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,60 @@
+; RUN: opt -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%p/Inputs/cfi-direct-call.yaml %s -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux"
+
+declare void @external_decl()
+declare void @external_nodecl()
+;declare void @internal_default_def()
+declare hidden void @internal_hidden_def()
+
+define i8 @local_a() {
+  call void @external_decl()
+  call void @external_nodecl()
+  call void @internal_default_def()
+  call void @internal_hidden_def()
+  call void @dsolocal_default_def()
+  call void @local_b()
+  ret i8 1
+}
+
+define dso_local void @dsolocal_default_def() {
+  ret void
+}
+
+define void @internal_default_def() {
+  ret void
+}
+
+define void @local_b() {
+  ret void
+}
+
+; CHECK: define i8 @local_a() {
+
+; Even though a jump table entry is generated, the call goes directly
+; to the function
+; CHECK-NEXT:   call void @external_decl()
+
+; External call with no CFI decl - no action
+; CHECK-NEXT:   call void @external_nodecl()
+
+; Internal function with default visibility gets routed through the jump table
+; as it may be overriden at run time.
+; CHECK-NEXT:   call void @internal_default_def()
+
+; Internal function with hidden visibility defined outside the module
+; generates a jump table entry and is renamed to *.cfi: route direct call
+; to the actual function, not jump table
+; CHECK-NEXT:   call void @internal_hidden_def.cfi()
+
+; dso_local function with defailt visibility can be short-circuited
+; CHECK-NEXT:   call void @dsolocal_default_def.cfi()
+
+; Local call - no action
+; CHECK-NEXT:   call void @local_b
+
+; CHECK-NEXT:   ret i8 1
+
+; CHECK: declare hidden void @internal_hidden_def.cfi()
+; CHECK: declare hidden void @external_decl.cfi_jt()

Added: llvm/trunk/test/Transforms/LowerTypeTests/cfi-direct-call1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/cfi-direct-call1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/cfi-direct-call1.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/cfi-direct-call1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,96 @@
+; RUN: opt -lowertypetests -S %s | FileCheck --check-prefix=FULL %s
+; RUN: opt -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%p/Inputs/cfi-direct-call1.yaml -S %s | FileCheck --check-prefix=THIN %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux"
+
+define hidden i32 @local_func1() !type !3 !type !4 {
+  ret i32 11
+}
+
+define hidden i32 @local_func2() !type !3 !type !4 {
+  ret i32 22
+}
+
+define hidden i32 @local_func3(i32 %i) local_unnamed_addr !type !5 !type !6 {
+entry:
+  %add = add nsw i32 %i, 44
+  ret i32 %add
+}
+
+declare !type !3 !type !4 extern_weak i32 @extern_weak()
+declare !type !3 !type !4 i32 @extern_decl()
+declare i1 @llvm.type.test(i8*, metadata)
+
+define hidden i32 @main(i32 %argc) {
+entry:
+  %cmp.i = icmp sgt i32 %argc, 1
+  %fptr1 = select i1 %cmp.i, i32 ()* @local_func1, i32 ()* @local_func2
+  %fptr2 = select i1 %cmp.i, i32 ()* @extern_weak, i32 ()* @extern_decl
+  %0 = bitcast i32 ()* %fptr1 to i8*
+  %1 = tail call i1 @llvm.type.test(i8* nonnull %0, metadata !"_ZTSFivE")
+
+  %call2 = tail call i32 %fptr1()
+  %2 = bitcast i32 ()* %fptr2 to i8*
+  %3 = tail call i1 @llvm.type.test(i8* %2, metadata !"_ZTSFivE")
+
+  %call4 = tail call i32 %fptr2()
+  %call5 = tail call i32 @extern_decl()
+  %call7 = tail call i32 @extern_weak()
+  %call6 = tail call i32 @local_func1()
+  %call8 = tail call i32 @local_func3(i32 4)
+  ret i32 12
+}
+
+!3 = !{i64 0, !"_ZTSFivE"}
+!4 = !{i64 0, !"_ZTSFivE.generalized"}
+!5 = !{i64 0, !"_ZTSFiiE"}
+!6 = !{i64 0, !"_ZTSFiiE.generalized"}
+
+; Make sure local_func1 and local_func2 have been renamed to <name>.cfi
+; FULL: define hidden i32 @local_func1.cfi()
+; FULL: define hidden i32 @local_func2.cfi()
+
+; There are no indirect calls of local_func3 type, it should not be renamed
+; FULL: define hidden i32 @local_func3(i32 %i)
+
+; Indirect references to local_func1 and local_func2 must to through jump table
+; FULL: %fptr1 = select i1 %cmp.i, i32 ()* @local_func1, i32 ()* @local_func2
+
+; Indirect references to extern_weak and extern_decl must go through jump table
+; FULL: %fptr2 = select i1 %cmp.i, i32 ()* select (i1 icmp ne (i32 ()* @extern_weak, i32 ()* null), i32 ()* bitcast ([8 x i8]* getelementptr inbounds ([4 x [8 x i8]], [4 x [8 x i8]]* bitcast (void ()* @.cfi.jumptable to [4 x [8 x i8]]*), i64 0, i64 2) to i32 ()*), i32 ()* null), i32 ()* bitcast ([8 x i8]* getelementptr inbounds ([4 x [8 x i8]], [4 x [8 x i8]]* bitcast (void ()* @.cfi.jumptable to [4 x [8 x i8]]*), i64 0, i64 3) to i32 ()*)
+
+; Direct calls to extern_weak and extern_decl should go to original names
+; FULL: %call5 = tail call i32 @extern_decl()
+; FULL: %call7 = tail call i32 @extern_weak()
+
+; Direct call to local_func1 should to the renamed version
+; FULL: %call6 = tail call i32 @local_func1.cfi()
+
+; local_func3 hasn't been renamed, direct call should go to the original name
+; FULL: %call8 = tail call i32 @local_func3(i32 4)
+
+; Check which jump table entries are created
+; FULL: define private void @.cfi.jumptable(){{.*}}
+; FULL-NEXT: entry:
+; FULL-NEXT: call void asm{{.*}}local_func1.cfi{{.*}}local_func2.cfi{{.*}}extern_weak{{.*}}extern_decl
+
+; Make sure all local functions have been renamed to <name>.cfi
+; THIN: define hidden i32 @local_func1.cfi()
+; THIN: define hidden i32 @local_func2.cfi()
+; THIN: define hidden i32 @local_func3.cfi(i32 %i){{.*}}
+
+; Indirect references to local_func1 and local_func2 must to through jump table
+; THIN: %fptr1 = select i1 %cmp.i, i32 ()* @local_func1, i32 ()* @local_func2
+
+; Indirect references to extern_weak and extern_decl must go through jump table
+; THIN: %fptr2 = select i1 %cmp.i, i32 ()* select (i1 icmp ne (i32 ()* @extern_weak, i32 ()* null), i32 ()* @extern_weak.cfi_jt, i32 ()* null), i32 ()* @extern_decl.cfi_jt
+
+; Direct calls to extern_weak and extern_decl should go to original names
+; THIN: %call5 = tail call i32 @extern_decl()
+; THIN: %call7 = tail call i32 @extern_weak()
+
+; Direct call to local_func1 should to the renamed version
+; THIN: %call6 = tail call i32 @local_func1.cfi()
+; THIN: %call8 = tail call i32 @local_func3.cfi(i32 4)
+

Added: llvm/trunk/test/Transforms/LowerTypeTests/constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/constant.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/constant.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/constant.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,33 @@
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+; RUN: opt -S -passes=lowertypetests < %s | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+ at a = constant i32 1, !type !0
+ at b = constant [2 x i32] [i32 2, i32 3], !type !1
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 4, !"typeid1"}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK: @foo(
+define i1 @foo() {
+  ; CHECK: ret i1 true
+  %x = call i1 @llvm.type.test(i8* bitcast (i32* @a to i8*), metadata !"typeid1")
+  ret i1 %x
+}
+
+; CHECK: @bar(
+define i1 @bar() {
+  ; CHECK: ret i1 true
+  %x = call i1 @llvm.type.test(i8* bitcast (i32* getelementptr ([2 x i32], [2 x i32]* @b, i32 0, i32 1) to i8*), metadata !"typeid1")
+  ret i1 %x
+}
+
+; CHECK: @baz(
+define i1 @baz() {
+  ; CHECK-NOT: ret i1 true
+  %x = call i1 @llvm.type.test(i8* bitcast (i32* getelementptr ([2 x i32], [2 x i32]* @b, i32 0, i32 0) to i8*), metadata !"typeid1")
+  ret i1 %x
+}

Added: llvm/trunk/test/Transforms/LowerTypeTests/export-alias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/export-alias.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/export-alias.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/export-alias.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt -S %s -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/exported-funcs.yaml | FileCheck %s
+;
+; CHECK: @alias1 = weak alias void (), void ()* @external_addrtaken
+; CHECK: @alias2 = hidden alias void (), void ()* @external_addrtaken
+; CHECK-NOT: @alias3 = alias
+; CHECK-NOT: @not_present
+
+target triple = "x86_64-unknown-linux"
+
+!cfi.functions = !{!0, !2, !3}
+!aliases = !{!4, !5, !6}
+
+!0 = !{!"external_addrtaken", i8 0, !1}
+!1 = !{i64 0, !"typeid1"}
+!2 = !{!"alias1", i8 1, !1}
+; alias2 not included here, this could happen if the only reference to alias2
+; is in a module compiled without cfi-icall
+!3 = !{!"alias3", i8 1, !1}
+!4 = !{!"alias1", !"external_addrtaken", i8 0, i8 1}
+!5 = !{!"alias2", !"external_addrtaken", i8 1, i8 0}
+!6 = !{!"alias3", !"not_present", i8 0, i8 0}

Added: llvm/trunk/test/Transforms/LowerTypeTests/export-allones.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/export-allones.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/export-allones.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/export-allones.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,182 @@
+; RUN: opt -mtriple=x86_64-unknown-linux -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-typeid2.yaml -lowertypetests-write-summary=%t < %s | FileCheck --check-prefixes=CHECK,X86 %s
+; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-X86 %s < %t
+
+; RUN: opt -mtriple=aarch64-unknown-linux -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-typeid2.yaml -lowertypetests-write-summary=%t < %s | FileCheck --check-prefixes=CHECK,ARM %s
+; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-ARM %s < %t
+
+ at foo = constant [2048 x i8] zeroinitializer, !type !0, !type !1, !type !2, !type !3, !type !4, !type !5, !type !6, !type !7, !type !8, !type !9, !type !10, !type !11, !type !12, !type !13, !type !14, !type !15, !type !16, !type !17, !type !18, !type !19, !type !20, !type !21, !type !22, !type !23, !type !24, !type !25, !type !26, !type !27, !type !28, !type !29, !type !30, !type !31, !type !32, !type !33, !type !34, !type !35, !type !36, !type !37, !type !38, !type !39, !type !40, !type !41, !type !42, !type !43, !type !44, !type !45, !type !46, !type !47, !type !48, !type !49, !type !50, !type !51, !type !52, !type !53, !type !54, !type !55, !type !56, !type !57, !type !58, !type !59, !type !60, !type !61, !type !62, !type !63, !type !64, !type !65, !type !66, !type !67, !type !68, !type !69, !type !70, !type !71, !type !72, !type !73, !type !74, !type !75, !type !76, !type !77, !type !78, !type !79, !type !80, !type !81, !type !82, !type !83, !type !84, !type !85, !type !86, !type !87, !type !88, !type !89, !type !90, !type !91, !type !92, !type !93, !type !94, !type !95, !type !96, !type !97, !type !98, !type !99, !type !100, !type !101, !type !102, !type !103, !type !104, !type !105, !type !106, !type !107, !type !108, !type !109, !type !110, !type !111, !type !112, !type !113, !type !114, !type !115, !type !116, !type !117, !type !118, !type !119, !type !120, !type !121, !type !122, !type !123, !type !124, !type !125, !type !126, !type !127, !type !128, !type !129, !type !130
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 2, !"typeid1"}
+
+!2 = !{i32 4, !"typeid2"}
+!3 = !{i32 8, !"typeid2"}
+!4 = !{i32 12, !"typeid2"}
+!5 = !{i32 16, !"typeid2"}
+!6 = !{i32 20, !"typeid2"}
+!7 = !{i32 24, !"typeid2"}
+!8 = !{i32 28, !"typeid2"}
+!9 = !{i32 32, !"typeid2"}
+!10 = !{i32 36, !"typeid2"}
+!11 = !{i32 40, !"typeid2"}
+!12 = !{i32 44, !"typeid2"}
+!13 = !{i32 48, !"typeid2"}
+!14 = !{i32 52, !"typeid2"}
+!15 = !{i32 56, !"typeid2"}
+!16 = !{i32 60, !"typeid2"}
+!17 = !{i32 64, !"typeid2"}
+!18 = !{i32 68, !"typeid2"}
+!19 = !{i32 72, !"typeid2"}
+!20 = !{i32 76, !"typeid2"}
+!21 = !{i32 80, !"typeid2"}
+!22 = !{i32 84, !"typeid2"}
+!23 = !{i32 88, !"typeid2"}
+!24 = !{i32 92, !"typeid2"}
+!25 = !{i32 96, !"typeid2"}
+!26 = !{i32 100, !"typeid2"}
+!27 = !{i32 104, !"typeid2"}
+!28 = !{i32 108, !"typeid2"}
+!29 = !{i32 112, !"typeid2"}
+!30 = !{i32 116, !"typeid2"}
+!31 = !{i32 120, !"typeid2"}
+!32 = !{i32 124, !"typeid2"}
+!33 = !{i32 128, !"typeid2"}
+!34 = !{i32 132, !"typeid2"}
+!35 = !{i32 136, !"typeid2"}
+!36 = !{i32 140, !"typeid2"}
+!37 = !{i32 144, !"typeid2"}
+!38 = !{i32 148, !"typeid2"}
+!39 = !{i32 152, !"typeid2"}
+!40 = !{i32 156, !"typeid2"}
+!41 = !{i32 160, !"typeid2"}
+!42 = !{i32 164, !"typeid2"}
+!43 = !{i32 168, !"typeid2"}
+!44 = !{i32 172, !"typeid2"}
+!45 = !{i32 176, !"typeid2"}
+!46 = !{i32 180, !"typeid2"}
+!47 = !{i32 184, !"typeid2"}
+!48 = !{i32 188, !"typeid2"}
+!49 = !{i32 192, !"typeid2"}
+!50 = !{i32 196, !"typeid2"}
+!51 = !{i32 200, !"typeid2"}
+!52 = !{i32 204, !"typeid2"}
+!53 = !{i32 208, !"typeid2"}
+!54 = !{i32 212, !"typeid2"}
+!55 = !{i32 216, !"typeid2"}
+!56 = !{i32 220, !"typeid2"}
+!57 = !{i32 224, !"typeid2"}
+!58 = !{i32 228, !"typeid2"}
+!59 = !{i32 232, !"typeid2"}
+!60 = !{i32 236, !"typeid2"}
+!61 = !{i32 240, !"typeid2"}
+!62 = !{i32 244, !"typeid2"}
+!63 = !{i32 248, !"typeid2"}
+!64 = !{i32 252, !"typeid2"}
+!65 = !{i32 256, !"typeid2"}
+!66 = !{i32 260, !"typeid2"}
+!67 = !{i32 264, !"typeid2"}
+!68 = !{i32 268, !"typeid2"}
+!69 = !{i32 272, !"typeid2"}
+!70 = !{i32 276, !"typeid2"}
+!71 = !{i32 280, !"typeid2"}
+!72 = !{i32 284, !"typeid2"}
+!73 = !{i32 288, !"typeid2"}
+!74 = !{i32 292, !"typeid2"}
+!75 = !{i32 296, !"typeid2"}
+!76 = !{i32 300, !"typeid2"}
+!77 = !{i32 304, !"typeid2"}
+!78 = !{i32 308, !"typeid2"}
+!79 = !{i32 312, !"typeid2"}
+!80 = !{i32 316, !"typeid2"}
+!81 = !{i32 320, !"typeid2"}
+!82 = !{i32 324, !"typeid2"}
+!83 = !{i32 328, !"typeid2"}
+!84 = !{i32 332, !"typeid2"}
+!85 = !{i32 336, !"typeid2"}
+!86 = !{i32 340, !"typeid2"}
+!87 = !{i32 344, !"typeid2"}
+!88 = !{i32 348, !"typeid2"}
+!89 = !{i32 352, !"typeid2"}
+!90 = !{i32 356, !"typeid2"}
+!91 = !{i32 360, !"typeid2"}
+!92 = !{i32 364, !"typeid2"}
+!93 = !{i32 368, !"typeid2"}
+!94 = !{i32 372, !"typeid2"}
+!95 = !{i32 376, !"typeid2"}
+!96 = !{i32 380, !"typeid2"}
+!97 = !{i32 384, !"typeid2"}
+!98 = !{i32 388, !"typeid2"}
+!99 = !{i32 392, !"typeid2"}
+!100 = !{i32 396, !"typeid2"}
+!101 = !{i32 400, !"typeid2"}
+!102 = !{i32 404, !"typeid2"}
+!103 = !{i32 408, !"typeid2"}
+!104 = !{i32 412, !"typeid2"}
+!105 = !{i32 416, !"typeid2"}
+!106 = !{i32 420, !"typeid2"}
+!107 = !{i32 424, !"typeid2"}
+!108 = !{i32 428, !"typeid2"}
+!109 = !{i32 432, !"typeid2"}
+!110 = !{i32 436, !"typeid2"}
+!111 = !{i32 440, !"typeid2"}
+!112 = !{i32 444, !"typeid2"}
+!113 = !{i32 448, !"typeid2"}
+!114 = !{i32 452, !"typeid2"}
+!115 = !{i32 456, !"typeid2"}
+!116 = !{i32 460, !"typeid2"}
+!117 = !{i32 464, !"typeid2"}
+!118 = !{i32 468, !"typeid2"}
+!119 = !{i32 472, !"typeid2"}
+!120 = !{i32 476, !"typeid2"}
+!121 = !{i32 480, !"typeid2"}
+!122 = !{i32 484, !"typeid2"}
+!123 = !{i32 488, !"typeid2"}
+!124 = !{i32 492, !"typeid2"}
+!125 = !{i32 496, !"typeid2"}
+!126 = !{i32 500, !"typeid2"}
+!127 = !{i32 504, !"typeid2"}
+!128 = !{i32 508, !"typeid2"}
+!129 = !{i32 512, !"typeid2"}
+!130 = !{i32 516, !"typeid2"}
+
+; CHECK: [[G:@[0-9]+]] = private constant { [2048 x i8] } zeroinitializer
+
+; CHECK: @__typeid_typeid1_global_addr = hidden alias i8, getelementptr inbounds ({ [2048 x i8] }, { [2048 x i8] }* [[G]], i32 0, i32 0, i32 0)
+; X86: @__typeid_typeid1_align = hidden alias i8, inttoptr (i8 1 to i8*)
+; X86: @__typeid_typeid1_size_m1 = hidden alias i8, inttoptr (i64 1 to i8*)
+
+; CHECK: @__typeid_typeid2_global_addr = hidden alias i8, getelementptr inbounds ({ [2048 x i8] }, { [2048 x i8] }* [[G]], i32 0, i32 0, i64 4)
+; X86: @__typeid_typeid2_align = hidden alias i8, inttoptr (i8 2 to i8*)
+; X86: @__typeid_typeid2_size_m1 = hidden alias i8, inttoptr (i64 128 to i8*)
+
+; ARM-NOT: alias {{.*}} inttoptr
+
+; CHECK: @foo = alias [2048 x i8], getelementptr inbounds ({ [2048 x i8] }, { [2048 x i8] }* [[G]], i32 0, i32 0)
+
+; SUMMARY:      TypeIdMap:
+; SUMMARY-NEXT:   typeid1:
+; SUMMARY-NEXT:     TTRes:
+; SUMMARY-NEXT:       Kind:            AllOnes
+; SUMMARY-NEXT:       SizeM1BitWidth:  7
+; SUMMARY-X86-NEXT:   AlignLog2:       0
+; SUMMARY-X86-NEXT:   SizeM1:          0
+; SUMMARY-X86-NEXT:   BitMask:         0
+; SUMMARY-X86-NEXT:   InlineBits:      0
+; SUMMARY-ARM-NEXT:   AlignLog2:       1
+; SUMMARY-ARM-NEXT:   SizeM1:          1
+; SUMMARY-ARM-NEXT:   BitMask:         0
+; SUMMARY-ARM-NEXT:   InlineBits:      0
+; SUMMARY-NEXT:     WPDRes:
+; SUMMARY-NEXT:   typeid2:
+; SUMMARY-NEXT:     TTRes:
+; SUMMARY-NEXT:       Kind:            AllOnes
+; SUMMARY-NEXT:       SizeM1BitWidth:  32
+; SUMMARY-X86-NEXT:   AlignLog2:       0
+; SUMMARY-X86-NEXT:   SizeM1:          0
+; SUMMARY-X86-NEXT:   BitMask:         0
+; SUMMARY-X86-NEXT:   InlineBits:      0
+; SUMMARY-ARM-NEXT:   AlignLog2:       2
+; SUMMARY-ARM-NEXT:   SizeM1:          128
+; SUMMARY-ARM-NEXT:   BitMask:         0
+; SUMMARY-ARM-NEXT:   InlineBits:      0
+; SUMMARY-NEXT:     WPDRes:

Added: llvm/trunk/test/Transforms/LowerTypeTests/export-bytearray.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/export-bytearray.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/export-bytearray.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/export-bytearray.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,61 @@
+; RUN: opt -mtriple=x86_64-unknown-linux -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-typeid2.yaml -lowertypetests-write-summary=%t < %s | FileCheck --check-prefixes=CHECK,X86 %s
+; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-X86 %s < %t
+
+; RUN: opt -mtriple=aarch64-unknown-linux -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-typeid2.yaml -lowertypetests-write-summary=%t < %s | FileCheck --check-prefixes=CHECK,ARM %s
+; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-ARM %s < %t
+
+ at foo = constant [2048 x i8] zeroinitializer, !type !0, !type !1, !type !2, !type !3
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 130, !"typeid1"}
+!2 = !{i32 4, !"typeid2"}
+!3 = !{i32 1032, !"typeid2"}
+
+; CHECK: [[G:@[0-9]+]] = private constant { [2048 x i8] } zeroinitializer
+; CHECK: [[B:@[0-9]+]] = private constant [258 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01"
+
+; CHECK: @__typeid_typeid1_global_addr = hidden alias i8, getelementptr inbounds ({ [2048 x i8] }, { [2048 x i8] }* [[G]], i32 0, i32 0, i32 0)
+; X86: @__typeid_typeid1_align = hidden alias i8, inttoptr (i8 1 to i8*)
+; X86: @__typeid_typeid1_size_m1 = hidden alias i8, inttoptr (i64 65 to i8*)
+; CHECK: @__typeid_typeid1_byte_array = hidden alias i8, i8* @bits.1
+; X86: @__typeid_typeid1_bit_mask = hidden alias i8, inttoptr (i8 2 to i8*)
+
+; CHECK: @__typeid_typeid2_global_addr = hidden alias i8, getelementptr inbounds ({ [2048 x i8] }, { [2048 x i8] }* [[G]], i32 0, i32 0, i64 4)
+; X86: @__typeid_typeid2_align = hidden alias i8, inttoptr (i8 2 to i8*)
+; X86: @__typeid_typeid2_size_m1 = hidden alias i8, inttoptr (i64 257 to i8*)
+; CHECK: @__typeid_typeid2_byte_array = hidden alias i8, i8* @bits
+; X86: @__typeid_typeid2_bit_mask = hidden alias i8, inttoptr (i8 1 to i8*)
+
+; ARM-NOT: alias {{.*}} inttoptr
+
+; CHECK: @foo = alias [2048 x i8], getelementptr inbounds ({ [2048 x i8] }, { [2048 x i8] }* [[G]], i32 0, i32 0)
+; CHECK: @bits = private alias i8, getelementptr inbounds ([258 x i8], [258 x i8]* [[B]], i64 0, i64 0)
+; CHECK: @bits.1 = private alias i8, getelementptr inbounds ([258 x i8], [258 x i8]* [[B]], i64 0, i64 0)
+
+; SUMMARY:      TypeIdMap:
+; SUMMARY-NEXT:   typeid1:
+; SUMMARY-NEXT:     TTRes:
+; SUMMARY-NEXT:       Kind:            ByteArray
+; SUMMARY-NEXT:       SizeM1BitWidth:  7
+; SUMMARY-X86-NEXT:   AlignLog2:       0
+; SUMMARY-X86-NEXT:   SizeM1:          0
+; SUMMARY-X86-NEXT:   BitMask:         0
+; SUMMARY-X86-NEXT:   InlineBits:      0
+; SUMMARY-ARM-NEXT:   AlignLog2:       1
+; SUMMARY-ARM-NEXT:   SizeM1:          65
+; SUMMARY-ARM-NEXT:   BitMask:         2
+; SUMMARY-ARM-NEXT:   InlineBits:      0
+; SUMMARY-NEXT:     WPDRes:
+; SUMMARY-NEXT:   typeid2:
+; SUMMARY-NEXT:     TTRes:
+; SUMMARY-NEXT:       Kind:            ByteArray
+; SUMMARY-NEXT:       SizeM1BitWidth:  32
+; SUMMARY-X86-NEXT:   AlignLog2:       0
+; SUMMARY-X86-NEXT:   SizeM1:          0
+; SUMMARY-X86-NEXT:   BitMask:         0
+; SUMMARY-X86-NEXT:   InlineBits:      0
+; SUMMARY-ARM-NEXT:   AlignLog2:       2
+; SUMMARY-ARM-NEXT:   SizeM1:          257
+; SUMMARY-ARM-NEXT:   BitMask:         1
+; SUMMARY-ARM-NEXT:   InlineBits:      0
+; SUMMARY-NEXT:     WPDRes:

Added: llvm/trunk/test/Transforms/LowerTypeTests/export-cross-dso-cfi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/export-cross-dso-cfi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/export-cross-dso-cfi.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/export-cross-dso-cfi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; Test that external functions have jumptable entries emitted even if they are
+; not address-taken when Cross-DSO CFI is used, but not otherwise.
+
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/exported-funcs.yaml < %s | FileCheck --check-prefixes=CHECK,CROSSDSO %s
+; RUN: cat %s | grep -v "llvm.module.flags" | opt -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/exported-funcs.yaml | FileCheck --check-prefixes=CHECK,NORMAL %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;;; Defined in the ThinLTO portion of the build (e.g. the summary)
+; CROSSDSO: declare !type !1 !type !2 hidden void @external.cfi()
+; NORMAL: declare !type !1 !type !2 void @external()
+declare !type !1 !type !2 void @external()
+
+; Don't emit jumptable entries for external declarations/non-external definitions
+; CHECK-NOT: @external2
+; CHECK-NOT: @internal
+
+;;; Defined in the regular LTO portion of the build
+; CROSSDSO: define hidden void @regularlto_external.cfi()
+; NORMAL: define void @regularlto_external()
+define void @regularlto_external() !type !1 !type !2 {
+  ret void
+}
+
+; CHECK: define internal void @regularlto_internal()
+define internal void @regularlto_internal() !type !1 !type !2 {
+  ret void
+}
+
+!cfi.functions = !{!0, !3, !4}
+!llvm.module.flags = !{!5}
+
+!0 = !{!"external", i8 0, !1, !2}
+!1 = !{i64 0, !"typeid1"}
+!2 = !{i64 0, i64 1234}
+!3 = !{!"external2", i8 1, !1, !2}
+!4 = !{!"internal", i8 0, !1, !2}
+!5 = !{i32 4, !"Cross-DSO CFI", i32 1}

Added: llvm/trunk/test/Transforms/LowerTypeTests/export-dead.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/export-dead.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/export-dead.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/export-dead.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,14 @@
+; The only use of "typeid1" is in a dead function. Export nothing.
+
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-dead.yaml -lowertypetests-write-summary=%t < %s | FileCheck %s
+; RUN: FileCheck --check-prefix=SUMMARY %s < %t
+
+ at foo = constant i32 42, !type !0
+
+!0 = !{i32 0, !"typeid1"}
+
+; CHECK-NOT: @__typeid_typeid1_global_addr =
+
+; SUMMARY:      TypeIdMap:
+; SUMMARY-NEXT: WithGlobalValueDeadStripping: true
+; SUMMARY-NEXT: ...

Added: llvm/trunk/test/Transforms/LowerTypeTests/export-icall.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/export-icall.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/export-icall.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/export-icall.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,91 @@
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/export-icall.yaml -lowertypetests-write-summary=%t < %s | FileCheck %s
+; RUN: FileCheck --check-prefix=SUMMARY %s < %t
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @h(i8 %x) !type !2 {
+  ret void
+}
+
+declare !type !8 void @f(i32 %x)
+define available_externally void @f2(i32 %x) !type !8 {
+  ret void
+}
+define void @f3(i32 %x) !type !8 {
+  ret void
+}
+
+!cfi.functions = !{!0, !1, !3, !9, !10, !4, !5, !6}
+
+; declaration of @h with a different type is ignored
+!0 = !{!"h", i8 1, !7}
+
+; extern_weak declaration of @h with a different type is ignored as well
+!1 = !{!"h", i8 2, !8}
+!2 = !{i64 0, !"typeid1"}
+
+; definitions of @f and @f2 replace types on the IR declarations above
+!3 = !{!"f", i8 0, !2}
+!9 = !{!"f2", i8 0, !2}
+!10 = !{!"f3", i8 0, !2}
+!4 = !{!"external", i8 1, !2}
+!5 = !{!"external_weak", i8 2, !2}
+!6 = !{!"g", i8 0, !7}
+!7 = !{i64 0, !"typeid2"}
+!8 = !{i64 0, !"typeid3"}
+
+
+; CHECK-DAG: @__typeid_typeid1_global_addr = hidden alias i8, bitcast (void ()* [[JT1:.*]] to i8*)
+; CHECK-DAG: @__typeid_typeid1_align = hidden alias i8, inttoptr (i8 3 to i8*)
+; CHECK-DAG: @__typeid_typeid1_size_m1 = hidden alias i8, inttoptr (i64 4 to i8*)
+
+; CHECK-DAG: @h                    = alias void (i8), bitcast (void ()* [[JT1]] to void (i8)*)
+; CHECK-DAG: @f                    = alias void (i32), {{.*}}getelementptr {{.*}}void ()* [[JT1]]
+; CHECK-DAG: @f2                   = alias void (i32), {{.*}}getelementptr {{.*}}void ()* [[JT1]]
+; CHECK-DAG: @external.cfi_jt      = hidden alias void (), {{.*}}getelementptr {{.*}}void ()* [[JT1]]
+; CHECK-DAG: @external_weak.cfi_jt = hidden alias void (), {{.*}}getelementptr {{.*}}void ()* [[JT1]]
+
+; CHECK-DAG: @__typeid_typeid2_global_addr = hidden alias i8, bitcast (void ()* [[JT2:.*]] to i8*)
+
+; CHECK-DAG: @g                    = alias void (), void ()* [[JT2]]
+
+; CHECK-DAG: define hidden void @h.cfi(i8 {{.*}}) !type !{{.*}}
+; CHECK-DAG: declare !type !{{.*}} void @external()
+; CHECK-DAG: declare !type !{{.*}} void @external_weak()
+; CHECK-DAG: declare !type !{{.*}} void @f.cfi(i32)
+; CHECK-DAG: declare !type !{{.*}} void @f2.cfi(i32)
+; CHECK-DAG: define void @f3(i32 {{.*}}) !type !3
+; CHECK-DAG: !3 = !{i64 0, !"typeid3"}
+; CHECK-DAG: declare !type !{{.*}} void @g.cfi()
+
+
+; SUMMARY:      TypeIdMap:
+; SUMMARY-NEXT:   typeid1:
+; SUMMARY-NEXT:     TTRes:
+; SUMMARY-NEXT:       Kind:            AllOnes
+; SUMMARY-NEXT:       SizeM1BitWidth:  7
+; SUMMARY-NEXT:       AlignLog2:       0
+; SUMMARY-NEXT:       SizeM1:          0
+; SUMMARY-NEXT:       BitMask:         0
+; SUMMARY-NEXT:       InlineBits:      0
+; SUMMARY-NEXT:     WPDRes:
+; SUMMARY-NEXT:   typeid2:
+; SUMMARY-NEXT:     TTRes:
+; SUMMARY-NEXT:       Kind:            Single
+; SUMMARY-NEXT:       SizeM1BitWidth:  0
+; SUMMARY-NEXT:       AlignLog2:       0
+; SUMMARY-NEXT:       SizeM1:          0
+; SUMMARY-NEXT:       BitMask:         0
+; SUMMARY-NEXT:       InlineBits:      0
+; SUMMARY-NEXT:     WPDRes:
+
+; SUMMARY:      CfiFunctionDefs:
+; SUMMARY-NEXT:   - f
+; SUMMARY-NEXT:   - f2
+; SUMMARY-NEXT:   - g
+; SUMMARY-NEXT:   - h
+; SUMMARY-NEXT: CfiFunctionDecls:
+; SUMMARY-NEXT:   - external
+; SUMMARY-NEXT:   - external_weak
+; SUMMARY-NEXT: ...

Added: llvm/trunk/test/Transforms/LowerTypeTests/export-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/export-inline.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/export-inline.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/export-inline.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,54 @@
+; RUN: opt -mtriple=x86_64-unknown-linux -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-typeid2.yaml -lowertypetests-write-summary=%t < %s | FileCheck --check-prefixes=CHECK,X86 %s
+; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-X86 %s < %t
+
+; RUN: opt -mtriple=aarch64-unknown-linux -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-typeid2.yaml -lowertypetests-write-summary=%t < %s | FileCheck --check-prefixes=CHECK,ARM %s
+; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-ARM %s < %t
+
+ at foo = constant [2048 x i8] zeroinitializer, !type !0, !type !1, !type !2, !type !3
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 6, !"typeid1"}
+!2 = !{i32 4, !"typeid2"}
+!3 = !{i32 136, !"typeid2"}
+
+; CHECK: [[G:@[0-9]+]] = private constant { [2048 x i8] } zeroinitializer
+
+; CHECK: @__typeid_typeid1_global_addr = hidden alias i8, getelementptr inbounds ({ [2048 x i8] }, { [2048 x i8] }* [[G]], i32 0, i32 0, i32 0)
+; CHECK-X86: @__typeid_typeid1_align = hidden alias i8, inttoptr (i8 1 to i8*)
+; CHECK-X86: @__typeid_typeid1_size_m1 = hidden alias i8, inttoptr (i64 3 to i8*)
+; CHECK-X86: @__typeid_typeid1_inline_bits = hidden alias i8, inttoptr (i32 9 to i8*)
+
+; CHECK: @__typeid_typeid2_global_addr = hidden alias i8, getelementptr inbounds ({ [2048 x i8] }, { [2048 x i8] }* [[G]], i32 0, i32 0, i64 4)
+; CHECK-X86: @__typeid_typeid2_align = hidden alias i8, inttoptr (i8 2 to i8*)
+; CHECK-X86: @__typeid_typeid2_size_m1 = hidden alias i8, inttoptr (i64 33 to i8*)
+; CHECK-X86: @__typeid_typeid2_inline_bits = hidden alias i8, inttoptr (i64 8589934593 to i8*)
+
+; CHECK: @foo = alias [2048 x i8], getelementptr inbounds ({ [2048 x i8] }, { [2048 x i8] }* [[G]], i32 0, i32 0)
+
+; SUMMARY:      TypeIdMap:
+; SUMMARY-NEXT:   typeid1:
+; SUMMARY-NEXT:     TTRes:
+; SUMMARY-NEXT:       Kind:            Inline
+; SUMMARY-NEXT:       SizeM1BitWidth:  5
+; SUMMARY-X86-NEXT:   AlignLog2:       0
+; SUMMARY-X86-NEXT:   SizeM1:          0
+; SUMMARY-X86-NEXT:   BitMask:         0
+; SUMMARY-X86-NEXT:   InlineBits:      0
+; SUMMARY-ARM-NEXT:   AlignLog2:       1
+; SUMMARY-ARM-NEXT:   SizeM1:          3
+; SUMMARY-ARM-NEXT:   BitMask:         0
+; SUMMARY-ARM-NEXT:   InlineBits:      9
+; SUMMARY-NEXT:     WPDRes:
+; SUMMARY-NEXT:   typeid2:
+; SUMMARY-NEXT:     TTRes:
+; SUMMARY-NEXT:       Kind:            Inline
+; SUMMARY-NEXT:       SizeM1BitWidth:  6
+; SUMMARY-X86-NEXT:   AlignLog2:       0
+; SUMMARY-X86-NEXT:   SizeM1:          0
+; SUMMARY-X86-NEXT:   BitMask:         0
+; SUMMARY-X86-NEXT:   InlineBits:      0
+; SUMMARY-ARM-NEXT:   AlignLog2:       2
+; SUMMARY-ARM-NEXT:   SizeM1:          33
+; SUMMARY-ARM-NEXT:   BitMask:         0
+; SUMMARY-ARM-NEXT:   InlineBits:      8589934593
+; SUMMARY-NEXT:     WPDRes:

Added: llvm/trunk/test/Transforms/LowerTypeTests/export-nothing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/export-nothing.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/export-nothing.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/export-nothing.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,8 @@
+; RUN: opt -lowertypetests -lowertypetests-summary-action=export -lowertypetests-write-summary=%t -o /dev/null %s
+; RUN: FileCheck %s < %t
+
+; CHECK: ---
+; CHECK-NEXT: GlobalValueMap:
+; CHECK-NEXT: TypeIdMap:
+; CHECK-NEXT: WithGlobalValueDeadStripping: false
+; CHECK-NEXT: ...

Added: llvm/trunk/test/Transforms/LowerTypeTests/export-single.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/export-single.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/export-single.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/export-single.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,17 @@
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-typeid2.yaml -lowertypetests-write-summary=%t < %s | FileCheck %s
+; RUN: FileCheck --check-prefix=SUMMARY %s < %t
+
+ at foo = constant i32 42, !type !0
+
+!0 = !{i32 0, !"typeid1"}
+
+; CHECK: [[G:@[0-9]+]] = private constant { i32 } { i32 42 }
+
+; CHECK: @__typeid_typeid1_global_addr = hidden alias i8, bitcast ({ i32 }* [[G]] to i8*)
+; CHECK: @foo = alias i32, getelementptr inbounds ({ i32 }, { i32 }* [[G]], i32 0, i32 0)
+
+; SUMMARY:      TypeIdMap:
+; SUMMARY-NEXT:   typeid1:
+; SUMMARY-NEXT:     TTRes:
+; SUMMARY-NEXT:       Kind:            Single
+; SUMMARY-NEXT:       SizeM1BitWidth:  0

Added: llvm/trunk/test/Transforms/LowerTypeTests/export-symver.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/export-symver.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/export-symver.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/export-symver.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; RUN: opt -S %s -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/exported-funcs.yaml | FileCheck %s
+;
+; CHECK: module asm ".symver external_addrtaken, alias1"
+; CHECK-NOT: .symver external_addrtaken2
+; CHECK-NOT: .symver not_exported
+
+target triple = "x86_64-unknown-linux"
+
+!cfi.functions = !{!0, !1}
+!symvers = !{!3, !4}
+
+!0 = !{!"external_addrtaken", i8 0, !2}
+!1 = !{!"external_addrtaken2", i8 0, !2}
+!2 = !{i64 0, !"typeid1"}
+!3 = !{!"external_addrtaken", !"alias1"}
+!4 = !{!"not_exported", !"alias2"}

Added: llvm/trunk/test/Transforms/LowerTypeTests/external-global.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/external-global.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/external-global.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/external-global.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,14 @@
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=export -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-scei-ps4"
+
+; CHECK: @dipsy = external
+ at dipsy = external constant i8, !type !0
+
+define void @tinkywinky() {
+  store i8* @dipsy, i8** undef
+  ret void
+}
+
+!0 = !{i64 16, !"teletubbies"}

Added: llvm/trunk/test/Transforms/LowerTypeTests/function-arm-thumb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/function-arm-thumb.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/function-arm-thumb.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/function-arm-thumb.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt -S -mtriple=arm-unknown-linux-gnu -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-typeid2.yaml -lowertypetests-write-summary=%t < %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+
+define void @f1() "target-features"="+thumb-mode" !type !0 {
+  ret void
+}
+
+define void @g1() "target-features"="-thumb-mode" !type !0 {
+  ret void
+}
+
+define void @f2() "target-features"="+thumb-mode" !type !1 {
+  ret void
+}
+
+define void @g2() "target-features"="-thumb-mode" !type !1 {
+  ret void
+}
+
+define void @h2() "target-features"="-thumb-mode" !type !1 {
+  ret void
+}
+
+declare void @takeaddr(void()*, void()*, void()*, void()*, void()*)
+define void @addrtaken() {
+  call void @takeaddr(void()* @f1, void()* @g1, void()* @f2, void()* @g2, void()* @h2)
+  ret void
+}
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 0, !"typeid2"}
+
+; CHECK: define private void {{.*}} #[[AT:.*]] align 4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:  call void asm sideeffect "b.w $0\0Ab.w $1\0A", "s,s"(void ()* @f1.cfi, void ()* @g1.cfi)
+; CHECK-NEXT:  unreachable
+; CHECK-NEXT: }
+
+; CHECK: define private void {{.*}} #[[AA:.*]] align 4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:  call void asm sideeffect "b $0\0Ab $1\0Ab $2\0A", "s,s,s"(void ()* @f2.cfi, void ()* @g2.cfi, void ()* @h2.cfi)
+; CHECK-NEXT:  unreachable
+; CHECK-NEXT: }
+
+; CHECK-DAG: attributes #[[AA]] = { naked nounwind "target-features"="-thumb-mode" }
+; CHECK-DAG: attributes #[[AT]] = { naked nounwind "target-cpu"="cortex-a8" "target-features"="+thumb-mode" }

Added: llvm/trunk/test/Transforms/LowerTypeTests/function-disjoint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/function-disjoint.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/function-disjoint.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/function-disjoint.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,49 @@
+; RUN: opt -S -lowertypetests -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck --check-prefix=X64 %s
+; RUN: opt -S -lowertypetests -mtriple=wasm32-unknown-unknown < %s | FileCheck --check-prefix=WASM32 %s
+
+; Tests that we correctly handle bitsets with disjoint call target sets.
+
+target datalayout = "e-p:64:64"
+
+; X64: @f = alias void (), void ()* @[[JT0:.*]]
+; X64: @g = alias void (), void ()* @[[JT1:.*]]
+
+; WASM32: private constant [0 x i8] zeroinitializer
+ at 0 = private unnamed_addr constant [2 x void ()*] [void ()* @f, void ()* @g], align 16
+
+; X64: define hidden void @f.cfi()
+; WASM32: define void @f() !type !{{[0-9]+}} !wasm.index ![[I0:[0-9]+]]
+define void @f() !type !0 {
+  ret void
+}
+
+; X64: define hidden void @g.cfi()
+; WASM32: define void @g() !type !{{[0-9]+}} !wasm.index ![[I1:[0-9]+]]
+define void @g() !type !1 {
+  ret void
+}
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 0, !"typeid2"}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define i1 @foo(i8* %p) {
+  ; X64: icmp eq i64 {{.*}}, ptrtoint (void ()* @[[JT0]] to i64)
+  ; WASM32: icmp eq i64 {{.*}}, ptrtoint (i8* getelementptr (i8, i8* null, i64 1) to i64)
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1")
+  ; X64: icmp eq i64 {{.*}}, ptrtoint (void ()* @[[JT1]] to i64)
+  ; WASM32: icmp eq i64 {{.*}}, mul (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 2)
+  %y = call i1 @llvm.type.test(i8* %p, metadata !"typeid2")
+  %z = add i1 %x, %y
+  ret i1 %z
+}
+
+; X64: define private void @[[JT0]]() #{{.*}} align 8 {
+; X64:   call void asm sideeffect "jmp ${0:c}@plt\0Aint3\0Aint3\0Aint3\0A", "s"(void ()* @f.cfi)
+
+; X64: define private void @[[JT1]]() #{{.*}} align 8 {
+; X64:   call void asm sideeffect "jmp ${0:c}@plt\0Aint3\0Aint3\0Aint3\0A", "s"(void ()* @g.cfi)
+
+; WASM32: ![[I0]] = !{i64 1}
+; WASM32: ![[I1]] = !{i64 2}

Added: llvm/trunk/test/Transforms/LowerTypeTests/function-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/function-ext.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/function-ext.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/function-ext.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,41 @@
+; RUN: opt -S -lowertypetests -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck --check-prefixes=CHECK,X64 %s
+; RUN: opt -S -lowertypetests -mtriple=wasm32-unknown-unknown < %s | FileCheck --check-prefixes=CHECK,WASM32 %s
+
+; Tests that we correctly handle external references, including the case where
+; all functions in a bitset are external references.
+
+; WASM32: private constant [0 x i8] zeroinitializer
+
+; WASM32: declare !type !{{[0-9]+}} !wasm.index !{{[0-9]+}} void @foo1()
+declare !type !0 void @foo1()
+; WASM32: declare !type !{{[0-9]+}} void @foo2()
+declare !type !1 void @foo2()
+
+; CHECK-LABEL: @bar
+define i1 @bar(i8* %ptr) {
+  ; CHECK: %[[ICMP:[0-9]+]] = icmp eq
+  ; CHECK: ret i1 %[[ICMP]]
+  %p = call i1 @llvm.type.test(i8* %ptr, metadata !"type1")
+  ret i1 %p
+}
+
+; CHECK-LABEL: @baz
+define i1 @baz(i8* %ptr) {
+  ; CHECK: ret i1 false
+  %p = call i1 @llvm.type.test(i8* %ptr, metadata !"type2")
+  ret i1 %p
+}
+
+; CHECK-LABEL: @addrtaken
+define void()* @addrtaken() {
+  ; X64: ret void ()* @[[JT:.*]]
+  ret void()* @foo1
+}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+!0 = !{i64 0, !"type1"}
+!1 = !{i64 0, !"type2"}
+
+; X64: define private void @[[JT]]() #{{.*}} align {{.*}} {
+; X64:   call void asm sideeffect "jmp ${0:c}@plt\0Aint3\0Aint3\0Aint3\0A", "s"(void ()* @foo1)

Added: llvm/trunk/test/Transforms/LowerTypeTests/function-weak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/function-weak.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/function-weak.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/function-weak.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,66 @@
+; RUN: opt -S -lowertypetests -mtriple=i686-unknown-linux-gnu < %s | FileCheck --check-prefixes=CHECK,X86 %s
+; RUN: opt -S -lowertypetests -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck --check-prefixes=CHECK,X86 %s
+; RUN: opt -S -lowertypetests -mtriple=arm-unknown-linux-gnu < %s | FileCheck --check-prefixes=CHECK,ARM %s
+; RUN: opt -S -lowertypetests -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck --check-prefixes=CHECK,ARM %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: @x = global void ()* null, align 8
+ at x = global void ()* @f, align 8
+
+; CHECK: @x2 = global void ()* null, align 8
+ at x2 = global void ()* @f, align 8
+
+; CHECK: @x3 = internal global void ()* null, align 8
+ at x3 = internal constant void ()* @f, align 8
+
+; f + addend
+; CHECK: @x4 = global void ()* null, align 8
+ at x4 = global void ()* bitcast (i8* getelementptr (i8, i8* bitcast (void ()* @f to i8*), i64 42) to void ()*), align 8
+
+; aggregate initializer
+; CHECK: @s = global { void ()*, void ()*, i32 } zeroinitializer, align 8
+ at s = global { void ()*, void ()*, i32 } { void ()* @f, void ()* @f, i32 42 }, align 8
+
+; CHECK:  @llvm.global_ctors = appending global {{.*}}{ i32 0, void ()* @__cfi_global_var_init
+
+; CHECK: declare !type !0 extern_weak void @f()
+declare !type !0 extern_weak void @f()
+
+; CHECK: define zeroext i1 @check_f()
+define zeroext i1 @check_f() {
+entry:
+; CHECK: ret i1 icmp ne (void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT:.*]], void ()* null), void ()* null)
+  ret i1 icmp ne (void ()* @f, void ()* null)
+}
+
+; CHECK: define void @call_f() {
+define void @call_f() {
+entry:
+; CHECK: call void @f()
+  call void @f()
+  ret void
+}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define i1 @foo(i8* %p) {
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1")
+  ret i1 %x
+}
+
+; X86: define private void @[[JT]]() #{{.*}} align 8 {
+; ARM: define private void @[[JT]]() #{{.*}} align 4 {
+
+; CHECK: define internal void @__cfi_global_var_init() section ".text.startup" {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store { void ()*, void ()*, i32 } { void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), i32 42 }, { void ()*, void ()*, i32 }* @s, align 8
+; CHECK-NEXT: store void ()* bitcast (i8* getelementptr (i8, i8* bitcast (void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null) to i8*), i64 42) to void ()*), void ()** @x4, align 8
+; CHECK-NEXT: store void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), void ()** @x3, align 8
+; CHECK-NEXT: store void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), void ()** @x2, align 8
+; CHECK-NEXT: store void ()* select (i1 icmp ne (void ()* @f, void ()* null), void ()* @[[JT]], void ()* null), void ()** @x, align 8
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+
+!0 = !{i32 0, !"typeid1"}

Added: llvm/trunk/test/Transforms/LowerTypeTests/function.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/function.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/function.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/function.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,79 @@
+; RUN: opt -S -lowertypetests -mtriple=i686-unknown-linux-gnu < %s | FileCheck --check-prefixes=X86,X86-LINUX,NATIVE %s
+; RUN: opt -S -lowertypetests -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck --check-prefixes=X86,X86-LINUX,NATIVE %s
+; RUN: opt -S -lowertypetests -mtriple=i686-pc-win32 < %s | FileCheck --check-prefixes=X86,X86-WIN32,NATIVE %s
+; RUN: opt -S -lowertypetests -mtriple=x86_64-pc-win32 < %s | FileCheck --check-prefixes=X86,X86-WIN32,NATIVE %s
+; RUN: opt -S -lowertypetests -mtriple=arm-unknown-linux-gnu < %s | FileCheck --check-prefixes=ARM,NATIVE %s
+; RUN: opt -S -lowertypetests -mtriple=thumb-unknown-linux-gnu < %s | FileCheck --check-prefixes=THUMB,NATIVE %s
+; RUN: opt -S -lowertypetests -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck --check-prefixes=ARM,NATIVE %s
+; RUN: opt -S -lowertypetests -mtriple=wasm32-unknown-unknown < %s | FileCheck --check-prefix=WASM32 %s
+
+; Tests that we correctly handle bitsets containing 2 or more functions.
+
+target datalayout = "e-p:64:64"
+
+
+; NATIVE: @0 = private unnamed_addr constant [2 x void (...)*] [void (...)* bitcast (void ()* @f to void (...)*), void (...)* bitcast (void ()* @g to void (...)*)], align 16
+ at 0 = private unnamed_addr constant [2 x void (...)*] [void (...)* bitcast (void ()* @f to void (...)*), void (...)* bitcast (void ()* @g to void (...)*)], align 16
+
+; NATIVE: private constant [0 x i8] zeroinitializer
+; WASM32: private constant [0 x i8] zeroinitializer
+
+; NATIVE: @f = alias void (), void ()* @[[JT:.*]]
+
+; X86: @g = internal alias void (), bitcast ([8 x i8]* getelementptr inbounds ([2 x [8 x i8]], [2 x [8 x i8]]* bitcast (void ()* @[[JT]] to [2 x [8 x i8]]*), i64 0, i64 1) to void ()*)
+; ARM: @g = internal alias void (), bitcast ([4 x i8]* getelementptr inbounds ([2 x [4 x i8]], [2 x [4 x i8]]* bitcast (void ()* @[[JT]] to [2 x [4 x i8]]*), i64 0, i64 1) to void ()*)
+; THUMB: @g = internal alias void (), bitcast ([4 x i8]* getelementptr inbounds ([2 x [4 x i8]], [2 x [4 x i8]]* bitcast (void ()* @[[JT]] to [2 x [4 x i8]]*), i64 0, i64 1) to void ()*)
+
+; NATIVE: define hidden void @f.cfi()
+; WASM32: define void @f() !type !{{[0-9]+}} !wasm.index ![[I0:[0-9]+]]
+define void @f() !type !0 {
+  ret void
+}
+
+; NATIVE: define internal void @g.cfi()
+; WASM32: define internal void @g() !type !{{[0-9]+}} !wasm.index ![[I1:[0-9]+]]
+define internal void @g() !type !0 {
+  ret void
+}
+
+!0 = !{i32 0, !"typeid1"}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define i1 @foo(i8* %p) {
+  ; NATIVE: sub i64 {{.*}}, ptrtoint (void ()* @[[JT]] to i64)
+  ; WASM32: sub i64 {{.*}}, ptrtoint (i8* getelementptr (i8, i8* null, i64 1) to i64)
+  ; WASM32: icmp ule i64 {{.*}}, 1
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1")
+  ret i1 %x
+}
+
+; X86-LINUX:   define private void @[[JT]]() #[[ATTR:.*]] align 8 {
+; X86-WIN32:   define private void @[[JT]]() #[[ATTR:.*]] align 8 {
+; ARM:   define private void @[[JT]]() #[[ATTR:.*]] align 4 {
+; THUMB: define private void @[[JT]]() #[[ATTR:.*]] align 4 {
+
+; X86:      jmp ${0:c}@plt
+; X86-SAME: int3
+; X86-SAME: int3
+; X86-SAME: int3
+; X86-SAME: jmp ${1:c}@plt
+; X86-SAME: int3
+; X86-SAME: int3
+; X86-SAME: int3
+
+; ARM:      b $0
+; ARM-SAME: b $1
+
+; THUMB:      b.w $0
+; THUMB-SAME: b.w $1
+
+; NATIVE-SAME: "s,s"(void ()* @f.cfi, void ()* @g.cfi)
+
+; X86-LINUX: attributes #[[ATTR]] = { naked nounwind }
+; X86-WIN32: attributes #[[ATTR]] = { nounwind }
+; ARM: attributes #[[ATTR]] = { naked nounwind
+; THUMB: attributes #[[ATTR]] = { naked nounwind "target-cpu"="cortex-a8" "target-features"="+thumb-mode" }
+
+; WASM32: ![[I0]] = !{i64 1}
+; WASM32: ![[I1]] = !{i64 2}

Added: llvm/trunk/test/Transforms/LowerTypeTests/icall-branch-funnel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/icall-branch-funnel.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/icall-branch-funnel.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/icall-branch-funnel.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,46 @@
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux"
+
+; CHECK: @0 = private constant { i32, [0 x i8], i32 } { i32 1, [0 x i8] zeroinitializer, i32 2 }
+; CHECK: @f1 = alias void (), void ()* @.cfi.jumptable
+; CHECK: @f2 = alias void (), bitcast ([8 x i8]* getelementptr inbounds ([2 x [8 x i8]], [2 x [8 x i8]]* bitcast (void ()* @.cfi.jumptable to [2 x [8 x i8]]*), i64 0, i64 1) to void ()*)
+; CHECK: @g1 = alias i32, getelementptr inbounds ({ i32, [0 x i8], i32 }, { i32, [0 x i8], i32 }* @0, i32 0, i32 0)
+; CHECK: @g2 = alias i32, getelementptr inbounds ({ i32, [0 x i8], i32 }, { i32, [0 x i8], i32 }* @0, i32 0, i32 2)
+
+ at g1 = constant i32 1
+ at g2 = constant i32 2
+
+define void @f1() {
+  ret void
+}
+
+define void @f2() {
+  ret void
+}
+
+declare void @g1f()
+declare void @g2f()
+
+define void @jt2(i8* nest, ...) {
+  musttail call void (...) @llvm.icall.branch.funnel(
+      i8* %0,
+      i32* @g1, void ()* @g1f,
+      i32* @g2, void ()* @g2f,
+      ...
+  )
+  ret void
+}
+
+define void @jt3(i8* nest, ...) {
+  musttail call void (...) @llvm.icall.branch.funnel(
+      i8* %0,
+      void ()* @f1, void ()* @f1,
+      void ()* @f2, void ()* @f2,
+      ...
+  )
+  ret void
+}
+
+declare void @llvm.icall.branch.funnel(...)

Added: llvm/trunk/test/Transforms/LowerTypeTests/import-alias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/import-alias.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/import-alias.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/import-alias.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt -S %s -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%S/Inputs/import-alias.yaml | FileCheck %s
+;
+; Check that the definitions for @f and @f_alias are removed from this module
+; but @g_alias remains.
+;
+; CHECK: @g_alias = alias void (), void ()* @g
+; CHECK: define hidden void @f.cfi
+; CHECK: declare void @f()
+; CHECK: declare void @f_alias()
+
+target triple = "x86_64-unknown-linux"
+
+ at f_alias = alias void (), void ()* @f
+ at g_alias = alias void (), void ()* @g
+
+; Definition moved to the merged module
+define void @f() {
+  ret void
+}
+
+; Definition not moved to the merged module
+define void @g() {
+  ret void
+}
+
+define void @uses_aliases() {
+  call void @f_alias()
+  call void @g_alias()
+  ret void
+}

Added: llvm/trunk/test/Transforms/LowerTypeTests/import-icall.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/import-icall.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/import-icall.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/import-icall.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%S/Inputs/import-icall.yaml < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i8 @local_a() {
+  call void @external()
+  call void @external_weak()
+  ret i8 1
+}
+
+define internal i8 @local_b() {
+  %x = call i8 @local_a()
+  ret i8 %x
+}
+
+define i8 @use_b() {
+  %x = call i8 @local_b()
+  ret i8 %x
+}
+
+define void @local_decl() {
+  call void @local_decl()
+  ret void
+}
+
+declare void @external()
+declare extern_weak void @external_weak()
+
+; CHECK:      define hidden i8 @local_a.cfi() {
+; CHECK-NEXT:   call void @external()
+; CHECK-NEXT:   call void @external_weak()
+; CHECK-NEXT:   ret i8 1
+; CHECK-NEXT: }
+
+; internal @local_b is not the same function as "local_b" in the summary.
+; CHECK:      define internal i8 @local_b() {
+; CHECK-NEXT:   call i8 @local_a()
+
+; CHECK:      define void @local_decl()
+; CHECK-NEXT:   call void @local_decl()
+
+; CHECK: declare void @external()
+; CHECK: declare extern_weak void @external_weak()
+; CHECK: declare i8 @local_a()
+; CHECK: declare hidden void @external.cfi_jt()
+; CHECK: declare hidden void @external_weak.cfi_jt()

Added: llvm/trunk/test/Transforms/LowerTypeTests/import-unsat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/import-unsat.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/import-unsat.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/import-unsat.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; Test that we correctly import an unsat resolution for type identifier "typeid1".
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%S/Inputs/import-unsat.yaml -lowertypetests-write-summary=%t < %s | FileCheck %s
+; RUN: FileCheck --check-prefix=SUMMARY %s < %t
+
+; SUMMARY:      GlobalValueMap:
+; SUMMARY-NEXT:   42:
+; SUMMARY-NEXT:    - Linkage:             0
+; SUMMARY-NEXT:      NotEligibleToImport: false
+; SUMMARY-NEXT:      Live:                true
+; SUMMARY-NEXT:      Local:               false
+; SUMMARY-NEXT:      TypeTests: [ 123 ]
+; SUMMARY-NEXT: TypeIdMap:
+; SUMMARY-NEXT:   typeid1:
+; SUMMARY-NEXT:     TTRes:
+; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       SizeM1BitWidth:  0
+
+target datalayout = "e-p:32:32"
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define i1 @foo(i8* %p) {
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1")
+  ; CHECK: ret i1 false
+  ret i1 %x
+}

Added: llvm/trunk/test/Transforms/LowerTypeTests/import.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/import.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/import.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/import.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,193 @@
+; RUN: opt -mtriple=x86_64-unknown-linux -S -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%S/Inputs/import.yaml < %s | FileCheck --check-prefixes=CHECK,X86 %s
+; RUN: opt -mtriple=aarch64-unknown-linux -S -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%S/Inputs/import.yaml < %s | FileCheck --check-prefixes=CHECK,ARM %s
+
+target datalayout = "e-p:64:64"
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK-DAG: @__typeid_single_global_addr = external hidden global [0 x i8]
+; CHECK-DAG: @__typeid_inline6_global_addr = external hidden global [0 x i8]
+; X86-DAG: @__typeid_inline6_align = external hidden global [0 x i8], !absolute_symbol !0
+; X86-DAG: @__typeid_inline6_size_m1 = external hidden global [0 x i8], !absolute_symbol !1
+; X86-DAG: @__typeid_inline6_inline_bits = external hidden global [0 x i8], !absolute_symbol !2
+; CHECK-DAG: @__typeid_inline5_global_addr = external hidden global [0 x i8]
+; X86-DAG: @__typeid_inline5_align = external hidden global [0 x i8], !absolute_symbol !0
+; X86-DAG: @__typeid_inline5_size_m1 = external hidden global [0 x i8], !absolute_symbol !3
+; X86-DAG: @__typeid_inline5_inline_bits = external hidden global [0 x i8], !absolute_symbol !4
+; CHECK-DAG: @__typeid_bytearray32_global_addr = external hidden global [0 x i8]
+; X86-DAG: @__typeid_bytearray32_align = external hidden global [0 x i8], !absolute_symbol !0
+; X86-DAG: @__typeid_bytearray32_size_m1 = external hidden global [0 x i8], !absolute_symbol !4
+; CHECK-DAG: @__typeid_bytearray32_byte_array = external hidden global [0 x i8]
+; X86-DAG: @__typeid_bytearray32_bit_mask = external hidden global [0 x i8], !absolute_symbol !0
+; CHECK-DAG: @__typeid_bytearray7_global_addr = external hidden global [0 x i8]
+; X86-DAG: @__typeid_bytearray7_align = external hidden global [0 x i8], !absolute_symbol !0
+; X86-DAG: @__typeid_bytearray7_size_m1 = external hidden global [0 x i8], !absolute_symbol !5
+; CHECK-DAG: @__typeid_bytearray7_byte_array = external hidden global [0 x i8]
+; X86-DAG: @__typeid_bytearray7_bit_mask = external hidden global [0 x i8], !absolute_symbol !0
+; CHECK-DAG: @__typeid_allones32_global_addr = external hidden global [0 x i8]
+; X86-DAG: @__typeid_allones32_align = external hidden global [0 x i8], !absolute_symbol !0
+; X86-DAG: @__typeid_allones32_size_m1 = external hidden global [0 x i8], !absolute_symbol !4
+; CHECK-DAG: @__typeid_allones7_global_addr = external hidden global [0 x i8]
+; X86-DAG: @__typeid_allones7_align = external hidden global [0 x i8], !absolute_symbol !0
+; X86-DAG: @__typeid_allones7_size_m1 = external hidden global [0 x i8], !absolute_symbol !5
+
+; CHECK: define i1 @allones7(i8* [[p:%.*]])
+define i1 @allones7(i8* %p) {
+  ; CHECK-NEXT: [[pi:%.*]] = ptrtoint i8* [[p]] to i64
+  ; CHECK-NEXT: [[sub:%.*]] = sub i64 [[pi]], ptrtoint ([0 x i8]* @__typeid_allones7_global_addr to i64)
+  ; X86-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], zext (i8 ptrtoint ([0 x i8]* @__typeid_allones7_align to i8) to i64)
+  ; X86-NEXT: [[shl:%.*]] = shl i64 [[sub]], zext (i8 sub (i8 64, i8 ptrtoint ([0 x i8]* @__typeid_allones7_align to i8)) to i64)
+  ; ARM-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], 1
+  ; ARM-NEXT: [[shl:%.*]] = shl i64 [[sub]], 63
+  ; CHECK-NEXT: [[or:%.*]] = or i64 [[lshr]], [[shl]]
+  ; X86-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], ptrtoint ([0 x i8]* @__typeid_allones7_size_m1 to i64)
+  ; ARM-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], 42
+  ; CHECK-NEXT: ret i1 [[ule]]
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"allones7")
+  ret i1 %x
+}
+
+; CHECK: define i1 @allones32(i8* [[p:%.*]])
+define i1 @allones32(i8* %p) {
+  ; CHECK-NEXT: [[pi:%.*]] = ptrtoint i8* [[p]] to i64
+  ; CHECK-NEXT: [[sub:%.*]] = sub i64 [[pi]], ptrtoint ([0 x i8]* @__typeid_allones32_global_addr to i64)
+  ; X86-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], zext (i8 ptrtoint ([0 x i8]* @__typeid_allones32_align to i8) to i64)
+  ; X86-NEXT: [[shl:%.*]] = shl i64 [[sub]], zext (i8 sub (i8 64, i8 ptrtoint ([0 x i8]* @__typeid_allones32_align to i8)) to i64)
+  ; ARM-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], 2
+  ; ARM-NEXT: [[shl:%.*]] = shl i64 [[sub]], 62
+  ; CHECK-NEXT: [[or:%.*]] = or i64 [[lshr]], [[shl]]
+  ; X86-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], ptrtoint ([0 x i8]* @__typeid_allones32_size_m1 to i64)
+  ; ARM-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], 12345
+  ; CHECK-NEXT: ret i1 [[ule]]
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"allones32")
+  ret i1 %x
+}
+
+; CHECK: define i1 @bytearray7(i8* [[p:%.*]])
+define i1 @bytearray7(i8* %p) {
+  ; CHECK-NEXT: [[pi:%.*]] = ptrtoint i8* [[p]] to i64
+  ; CHECK-NEXT: [[sub:%.*]] = sub i64 [[pi]], ptrtoint ([0 x i8]* @__typeid_bytearray7_global_addr to i64)
+  ; X86-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], zext (i8 ptrtoint ([0 x i8]* @__typeid_bytearray7_align to i8) to i64)
+  ; X86-NEXT: [[shl:%.*]] = shl i64 [[sub]], zext (i8 sub (i8 64, i8 ptrtoint ([0 x i8]* @__typeid_bytearray7_align to i8)) to i64)
+  ; ARM-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], 3
+  ; ARM-NEXT: [[shl:%.*]] = shl i64 [[sub]], 61
+  ; CHECK-NEXT: [[or:%.*]] = or i64 [[lshr]], [[shl]]
+  ; X86-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], ptrtoint ([0 x i8]* @__typeid_bytearray7_size_m1 to i64)
+  ; ARM-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], 43
+  ; CHECK-NEXT: br i1 [[ule]], label %[[t:.*]], label %[[f:.*]]
+
+  ; CHECK: [[t]]:
+  ; CHECK-NEXT: [[gep:%.*]] = getelementptr i8, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @__typeid_bytearray7_byte_array, i32 0, i32 0), i64 [[or]]
+  ; CHECK-NEXT: [[load:%.*]] = load i8, i8* [[gep]]
+  ; X86-NEXT: [[and:%.*]] = and i8 [[load]], ptrtoint ([0 x i8]* @__typeid_bytearray7_bit_mask to i8)
+  ; ARM-NEXT: [[and:%.*]] = and i8 [[load]], ptrtoint (i8* inttoptr (i64 64 to i8*) to i8)
+  ; CHECK-NEXT: [[ne:%.*]] = icmp ne i8 [[and]], 0
+  ; CHECK-NEXT: br label %[[f]]
+
+  ; CHECK: [[f]]:
+  ; CHECK-NEXT: [[phi:%.*]] = phi i1 [ false, %0 ], [ [[ne]], %[[t]] ]
+  ; CHECK-NEXT: ret i1 [[phi]]
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"bytearray7")
+  ret i1 %x
+}
+
+; CHECK: define i1 @bytearray32(i8* [[p:%.*]])
+define i1 @bytearray32(i8* %p) {
+  ; CHECK-NEXT: [[pi:%.*]] = ptrtoint i8* [[p]] to i64
+  ; CHECK-NEXT: [[sub:%.*]] = sub i64 [[pi]], ptrtoint ([0 x i8]* @__typeid_bytearray32_global_addr to i64)
+  ; X86-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], zext (i8 ptrtoint ([0 x i8]* @__typeid_bytearray32_align to i8) to i64)
+  ; X86-NEXT: [[shl:%.*]] = shl i64 [[sub]], zext (i8 sub (i8 64, i8 ptrtoint ([0 x i8]* @__typeid_bytearray32_align to i8)) to i64)
+  ; ARM-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], 4
+  ; ARM-NEXT: [[shl:%.*]] = shl i64 [[sub]], 60
+  ; CHECK-NEXT: [[or:%.*]] = or i64 [[lshr]], [[shl]]
+  ; X86-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], ptrtoint ([0 x i8]* @__typeid_bytearray32_size_m1 to i64)
+  ; ARM-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], 12346
+  ; CHECK-NEXT: br i1 [[ule]], label %[[t:.*]], label %[[f:.*]]
+
+  ; CHECK: [[t]]:
+  ; CHECK-NEXT: [[gep:%.*]] = getelementptr i8, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @__typeid_bytearray32_byte_array, i32 0, i32 0), i64 [[or]]
+  ; CHECK-NEXT: [[load:%.*]] = load i8, i8* [[gep]]
+  ; X86-NEXT: [[and:%.*]] = and i8 [[load]], ptrtoint ([0 x i8]* @__typeid_bytearray32_bit_mask to i8)
+  ; ARM-NEXT: [[and:%.*]] = and i8 [[load]], ptrtoint (i8* inttoptr (i64 128 to i8*) to i8)
+  ; CHECK-NEXT: [[ne:%.*]] = icmp ne i8 [[and]], 0
+  ; CHECK-NEXT: br label %[[f]]
+
+  ; CHECK: [[f]]:
+  ; CHECK-NEXT: [[phi:%.*]] = phi i1 [ false, %0 ], [ [[ne]], %[[t]] ]
+  ; CHECK-NEXT: ret i1 [[phi]]
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"bytearray32")
+  ret i1 %x
+}
+
+; CHECK: define i1 @inline5(i8* [[p:%.*]])
+define i1 @inline5(i8* %p) {
+  ; CHECK-NEXT: [[pi:%.*]] = ptrtoint i8* [[p]] to i64
+  ; CHECK-NEXT: [[sub:%.*]] = sub i64 [[pi]], ptrtoint ([0 x i8]* @__typeid_inline5_global_addr to i64)
+  ; X86-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], zext (i8 ptrtoint ([0 x i8]* @__typeid_inline5_align to i8) to i64)
+  ; X86-NEXT: [[shl:%.*]] = shl i64 [[sub]], zext (i8 sub (i8 64, i8 ptrtoint ([0 x i8]* @__typeid_inline5_align to i8)) to i64)
+  ; ARM-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], 5
+  ; ARM-NEXT: [[shl:%.*]] = shl i64 [[sub]], 59
+  ; CHECK-NEXT: [[or:%.*]] = or i64 [[lshr]], [[shl]]
+  ; X86-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], ptrtoint ([0 x i8]* @__typeid_inline5_size_m1 to i64)
+  ; ARM-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], 31
+  ; CHECK-NEXT: br i1 [[ule]], label %[[t:.*]], label %[[f:.*]]
+
+  ; CHECK: [[t]]:
+  ; CHECK-NEXT: [[trunc:%.*]] = trunc i64 [[or]] to i32
+  ; CHECK-NEXT: [[and:%.*]] = and i32 [[trunc]], 31
+  ; CHECK-NEXT: [[shl2:%.*]] = shl i32 1, [[and]]
+  ; X86-NEXT: [[and2:%.*]] = and i32 ptrtoint ([0 x i8]* @__typeid_inline5_inline_bits to i32), [[shl2]]
+  ; ARM-NEXT: [[and2:%.*]] = and i32 123, [[shl2]]
+  ; CHECK-NEXT: [[ne:%.*]] = icmp ne i32 [[and2]], 0
+  ; CHECK-NEXT: br label %[[f]]
+
+  ; CHECK: [[f]]:
+  ; CHECK-NEXT: [[phi:%.*]] = phi i1 [ false, %0 ], [ [[ne]], %[[t]] ]
+  ; CHECK-NEXT: ret i1 [[phi]]
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"inline5")
+  ret i1 %x
+}
+
+; CHECK: define i1 @inline6(i8* [[p:%.*]])
+define i1 @inline6(i8* %p) {
+  ; CHECK-NEXT: [[pi:%.*]] = ptrtoint i8* [[p]] to i64
+  ; CHECK-NEXT: [[sub:%.*]] = sub i64 [[pi]], ptrtoint ([0 x i8]* @__typeid_inline6_global_addr to i64)
+  ; X86-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], zext (i8 ptrtoint ([0 x i8]* @__typeid_inline6_align to i8) to i64)
+  ; X86-NEXT: [[shl:%.*]] = shl i64 [[sub]], zext (i8 sub (i8 64, i8 ptrtoint ([0 x i8]* @__typeid_inline6_align to i8)) to i64)
+  ; ARM-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], 6
+  ; ARM-NEXT: [[shl:%.*]] = shl i64 [[sub]], 58
+  ; CHECK-NEXT: [[or:%.*]] = or i64 [[lshr]], [[shl]]
+  ; X86-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], ptrtoint ([0 x i8]* @__typeid_inline6_size_m1 to i64)
+  ; ARM-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], 63
+  ; CHECK-NEXT: br i1 [[ule]], label %[[t:.*]], label %[[f:.*]]
+
+  ; CHECK: [[t]]:
+  ; CHECK-NEXT: [[and:%.*]] = and i64 [[or]], 63
+  ; CHECK-NEXT: [[shl2:%.*]] = shl i64 1, [[and]]
+  ; X86-NEXT: [[and2:%.*]] = and i64 ptrtoint ([0 x i8]* @__typeid_inline6_inline_bits to i64), [[shl2]]
+  ; ARM-NEXT: [[and2:%.*]] = and i64 1000000000000, [[shl2]]
+  ; CHECK-NEXT: [[ne:%.*]] = icmp ne i64 [[and2]], 0
+  ; CHECK-NEXT: br label %[[f]]
+
+  ; CHECK: [[f]]:
+  ; CHECK-NEXT: [[phi:%.*]] = phi i1 [ false, %0 ], [ [[ne]], %[[t]] ]
+  ; CHECK-NEXT: ret i1 [[phi]]
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"inline6")
+  ret i1 %x
+}
+
+; CHECK: define i1 @single(i8* [[p:%.*]])
+define i1 @single(i8* %p) {
+  ; CHECK-NEXT: [[pi:%.*]] = ptrtoint i8* [[p]] to i64
+  ; CHECK-NEXT: [[eq:%.*]] = icmp eq i64 [[pi]], ptrtoint ([0 x i8]* @__typeid_single_global_addr to i64)
+  ; CHECK-NEXT: ret i1 [[eq]]
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"single")
+  ret i1 %x
+}
+
+; X86: !0 = !{i64 0, i64 256}
+; X86: !1 = !{i64 0, i64 64}
+; X86: !2 = !{i64 -1, i64 -1}
+; X86: !3 = !{i64 0, i64 32}
+; X86: !4 = !{i64 0, i64 4294967296}
+; X86: !5 = !{i64 0, i64 128}

Added: llvm/trunk/test/Transforms/LowerTypeTests/layout.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/layout.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/layout.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/layout.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+; Tests that this set of globals is laid out according to our layout algorithm
+; (see GlobalLayoutBuilder in include/llvm/Transforms/IPO/LowerTypeTests.h).
+; The chosen layout in this case is a, e, b, d, c.
+
+; CHECK: private constant { i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32 } { i32 1, [0 x i8] zeroinitializer, i32 5, [0 x i8] zeroinitializer, i32 2, [0 x i8] zeroinitializer, i32 4, [0 x i8] zeroinitializer, i32 3 }
+ at a = constant i32 1, !type !0, !type !2
+ at b = constant i32 2, !type !0, !type !1
+ at c = constant i32 3, !type !0
+ at d = constant i32 4, !type !1
+ at e = constant i32 5, !type !2
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 0, !"typeid2"}
+!2 = !{i32 0, !"typeid3"}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define void @foo() {
+  %x = call i1 @llvm.type.test(i8* undef, metadata !"typeid1")
+  %y = call i1 @llvm.type.test(i8* undef, metadata !"typeid2")
+  %z = call i1 @llvm.type.test(i8* undef, metadata !"typeid3")
+  ret void
+}

Added: llvm/trunk/test/Transforms/LowerTypeTests/nonstring.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/nonstring.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/nonstring.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/nonstring.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+
+; Tests that non-string metadata nodes may be used as bitset identifiers.
+
+target datalayout = "e-p:32:32"
+
+; CHECK: @[[ANAME:.*]] = private constant { i32 }
+; CHECK: @[[BNAME:.*]] = private constant { [2 x i32] }
+
+ at a = constant i32 1, !type !0
+ at b = constant [2 x i32] [i32 2, i32 3], !type !1
+
+!0 = !{i32 0, !2}
+!1 = !{i32 0, !3}
+!2 = distinct !{}
+!3 = distinct !{}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK-LABEL: @foo
+define i1 @foo(i8* %p) {
+  ; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ i32 }* @[[ANAME]] to i32)
+  %x = call i1 @llvm.type.test(i8* %p, metadata !2)
+  ret i1 %x
+}
+
+; CHECK-LABEL: @bar
+define i1 @bar(i8* %p) {
+  ; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ [2 x i32] }* @[[BNAME]] to i32)
+  %x = call i1 @llvm.type.test(i8* %p, metadata !3)
+  ret i1 %x
+}

Added: llvm/trunk/test/Transforms/LowerTypeTests/pr25902.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/pr25902.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/pr25902.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/pr25902.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,19 @@
+; PR25902: gold plugin crash.
+; RUN: opt -mtriple=i686-pc -S -lowertypetests < %s
+
+define void @f(void ()* %p) {
+entry:
+  %a = bitcast void ()* %p to i8*, !nosanitize !1
+  %b = call i1 @llvm.type.test(i8* %a, metadata !"_ZTSFvvE"), !nosanitize !1
+  ret void
+}
+
+define void @g() !type !0 {
+entry:
+  ret void
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+
+!0 = !{i64 0, !"_ZTSFvvE"}
+!1 = !{}

Added: llvm/trunk/test/Transforms/LowerTypeTests/pr37625.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/pr37625.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/pr37625.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/pr37625.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,14 @@
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/exported-funcs.yaml -lowertypetests-write-summary=%t < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare !type !2 extern_weak void @external_addrtaken(i8)
+
+!cfi.functions = !{!0, !1}
+
+!0 = !{!"external_addrtaken", i8 2, !2}
+!1 = !{!"external_addrtaken", i8 0, !2}
+!2 = !{i64 0, !"typeid1"}
+
+; CHECK-DAG: @external_addrtaken = alias void (i8), bitcast

Added: llvm/trunk/test/Transforms/LowerTypeTests/section.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/section.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/section.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/section.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; Test that functions with "section" attribute are accepted, and jumptables are
+; emitted in ".text".
+
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: @f = alias void (), void ()* @[[JT:.*]]
+; CHECK: define hidden void @f.cfi() section "xxx"
+
+define void @f() section "xxx" !type !0 {
+entry:
+  ret void
+}
+
+define i1 @g() {
+entry:
+  %0 = call i1 @llvm.type.test(i8* bitcast (void ()* @f to i8*), metadata !"_ZTSFvE")
+  ret i1 %0
+}
+
+; CHECK: define private void @[[JT]]() #{{.*}} align {{.*}} {
+
+declare i1 @llvm.type.test(i8*, metadata) nounwind readnone
+
+!0 = !{i64 0, !"_ZTSFvE"}

Added: llvm/trunk/test/Transforms/LowerTypeTests/simple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/simple.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/simple.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/simple.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,113 @@
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+; RUN: opt -S -lowertypetests -mtriple=x86_64-apple-macosx10.8.0 < %s | FileCheck %s
+; RUN: opt -S -O3 < %s | FileCheck -check-prefix=CHECK-NODISCARD %s
+
+target datalayout = "e-p:32:32"
+
+; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } { i32 1, [0 x i8] zeroinitializer, [63 x i32] zeroinitializer, [4 x i8] zeroinitializer, i32 3, [0 x i8] zeroinitializer, [2 x i32] [i32 4, i32 5] }
+ at a = constant i32 1, !type !0, !type !2
+ at b = hidden constant [63 x i32] zeroinitializer, !type !0, !type !1
+ at c = protected constant i32 3, !type !1, !type !2
+ at d = constant [2 x i32] [i32 4, i32 5], !type !3
+
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
+
+; CHECK: [[BA:@[^ ]*]] = private constant [68 x i8] c"\03\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\00\01"
+
+; Offset 0, 4 byte alignment
+!0 = !{i32 0, !"typeid1"}
+!3 = !{i32 4, !"typeid1"}
+
+; Offset 4, 256 byte alignment
+!1 = !{i32 0, !"typeid2"}
+
+; Offset 0, 4 byte alignment
+!2 = !{i32 0, !"typeid3"}
+
+; CHECK: @bits_use{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
+; CHECK: @bits_use.{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
+; CHECK: @bits_use.{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
+
+; CHECK: @a = alias i32, getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
+; CHECK: @b = hidden alias [63 x i32], getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
+; CHECK: @c = protected alias i32, getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
+; CHECK: @d = alias [2 x i32], getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
+
+; CHECK: @bits{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
+; CHECK: @bits.{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK: @foo(i32* [[A0:%[^ ]*]])
+define i1 @foo(i32* %p) {
+  ; CHECK-NOT: llvm.type.test
+
+  ; CHECK: [[R0:%[^ ]*]] = bitcast i32* [[A0]] to i8*
+  %pi8 = bitcast i32* %p to i8*
+  ; CHECK: [[R1:%[^ ]*]] = ptrtoint i8* [[R0]] to i32
+  ; CHECK: [[R2:%[^ ]*]] = sub i32 [[R1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i32)
+  ; CHECK: [[R3:%[^ ]*]] = lshr i32 [[R2]], 2
+  ; CHECK: [[R4:%[^ ]*]] = shl i32 [[R2]], 30
+  ; CHECK: [[R5:%[^ ]*]] = or i32 [[R3]], [[R4]]
+  ; CHECK: [[R6:%[^ ]*]] = icmp ule i32 [[R5]], 67
+  ; CHECK: br i1 [[R6]]
+
+  ; CHECK: [[R8:%[^ ]*]] = getelementptr i8, i8* @bits_use.{{[0-9]*}}, i32 [[R5]]
+  ; CHECK: [[R9:%[^ ]*]] = load i8, i8* [[R8]]
+  ; CHECK: [[R10:%[^ ]*]] = and i8 [[R9]], 1
+  ; CHECK: [[R11:%[^ ]*]] = icmp ne i8 [[R10]], 0
+
+  ; CHECK: [[R16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[R11]], {{%[^ ]*}} ]
+  %x = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid1")
+
+  ; CHECK-NOT: llvm.type.test
+  %y = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid1")
+
+  ; CHECK: ret i1 [[R16]]
+  ret i1 %x
+}
+
+; CHECK: @bar(i32* [[B0:%[^ ]*]])
+define i1 @bar(i32* %p) {
+  ; CHECK: [[S0:%[^ ]*]] = bitcast i32* [[B0]] to i8*
+  %pi8 = bitcast i32* %p to i8*
+  ; CHECK: [[S1:%[^ ]*]] = ptrtoint i8* [[S0]] to i32
+  ; CHECK: [[S2:%[^ ]*]] = sub i32 [[S1]], ptrtoint (i8* getelementptr (i8, i8* bitcast ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i8*), i32 4) to i32)
+  ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 8
+  ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 24
+  ; CHECK: [[S5:%[^ ]*]] = or i32 [[S3]], [[S4]]
+  ; CHECK: [[S6:%[^ ]*]] = icmp ule i32 [[S5]], 1
+  %x = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid2")
+
+  ; CHECK: ret i1 [[S6]]
+  ret i1 %x
+}
+
+; CHECK: @baz(i32* [[C0:%[^ ]*]])
+define i1 @baz(i32* %p) {
+  ; CHECK: [[T0:%[^ ]*]] = bitcast i32* [[C0]] to i8*
+  %pi8 = bitcast i32* %p to i8*
+  ; CHECK: [[T1:%[^ ]*]] = ptrtoint i8* [[T0]] to i32
+  ; CHECK: [[T2:%[^ ]*]] = sub i32 [[T1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i32)
+  ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 2
+  ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 30
+  ; CHECK: [[T5:%[^ ]*]] = or i32 [[T3]], [[T4]]
+  ; CHECK: [[T6:%[^ ]*]] = icmp ule i32 [[T5]], 65
+  ; CHECK: br i1 [[T6]]
+
+  ; CHECK: [[T8:%[^ ]*]] = getelementptr i8, i8* @bits_use{{(\.[0-9]*)?}}, i32 [[T5]]
+  ; CHECK: [[T9:%[^ ]*]] = load i8, i8* [[T8]]
+  ; CHECK: [[T10:%[^ ]*]] = and i8 [[T9]], 2
+  ; CHECK: [[T11:%[^ ]*]] = icmp ne i8 [[T10]], 0
+
+  ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T11]], {{%[^ ]*}} ]
+  %x = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid3")
+  ; CHECK: ret i1 [[T16]]
+  ret i1 %x
+}

Added: llvm/trunk/test/Transforms/LowerTypeTests/simplify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/simplify.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/simplify.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/simplify.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%S/Inputs/import.yaml < %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux"
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK: define i1 @bytearray7(i8* [[p:%.*]])
+define i1 @bytearray7(i8* %p) {
+  ; CHECK-NEXT: [[pi:%.*]] = ptrtoint i8* [[p]] to i64
+  ; CHECK-NEXT: [[sub:%.*]] = sub i64 [[pi]], ptrtoint ([0 x i8]* @__typeid_bytearray7_global_addr to i64)
+  ; CHECK-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], zext (i8 ptrtoint ([0 x i8]* @__typeid_bytearray7_align to i8) to i64)
+  ; CHECK-NEXT: [[shl:%.*]] = shl i64 [[sub]], zext (i8 sub (i8 64, i8 ptrtoint ([0 x i8]* @__typeid_bytearray7_align to i8)) to i64)
+  ; CHECK-NEXT: [[or:%.*]] = or i64 [[lshr]], [[shl]]
+  ; CHECK-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], ptrtoint ([0 x i8]* @__typeid_bytearray7_size_m1 to i64)
+  ; CHECK-NEXT: br i1 [[ule]], label %[[t1:.*]], label %[[f:.*]]
+
+  ; CHECK: [[t1]]:
+  ; CHECK-NEXT: [[gep:%.*]] = getelementptr i8, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @__typeid_bytearray7_byte_array, i32 0, i32 0), i64 [[or]]
+  ; CHECK-NEXT: [[load:%.*]] = load i8, i8* [[gep]]
+  ; CHECK-NEXT: [[and:%.*]] = and i8 [[load]], ptrtoint ([0 x i8]* @__typeid_bytearray7_bit_mask to i8)
+  ; CHECK-NEXT: [[ne:%.*]] = icmp ne i8 [[and]], 0
+  ; CHECK-NEXT: br i1 [[ne]], label %[[t:.*]], label %[[f:.*]]
+
+  ; CHECK: [[t]]:
+  ; CHECK-NEXT: ret i1 true
+
+  ; CHECK: [[f]]:
+  ; CHECK-NEXT: ret i1 false
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"bytearray7")
+  br i1 %x, label %t, label %f
+
+t:
+  ret i1 true
+
+f:
+  ret i1 false
+}

Added: llvm/trunk/test/Transforms/LowerTypeTests/simplify_phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/simplify_phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/simplify_phi.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/simplify_phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; Ensure that LowerTypeTests control flow simplification correctly handle phi nodes.
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%S/Inputs/import.yaml < %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK: define i1 @bytearray7(i8* [[p:%.*]])
+define i1 @bytearray7(i8* %p) {
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"bytearray7")
+  br i1 %x, label %t, label %f
+
+t:
+  br label %f
+
+f:
+  ; CHECK: %test = phi i1 [ false, %{{[0-9]+}} ], [ true, %t ], [ false, %0 ]
+  %test = phi i1 [ false, %0 ], [ true, %t ]
+  ret i1 %test
+}

Added: llvm/trunk/test/Transforms/LowerTypeTests/single-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/single-offset.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/single-offset.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/single-offset.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], i32 }
+ at a = constant i32 1, !type !0, !type !1
+ at b = constant i32 2, !type !0, !type !2
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 0, !"typeid2"}
+!2 = !{i32 0, !"typeid3"}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK: @foo(i8* [[A0:%[^ ]*]])
+define i1 @foo(i8* %p) {
+  ; CHECK: [[R0:%[^ ]*]] = ptrtoint i8* [[A0]] to i32
+  ; CHECK: [[R1:%[^ ]*]] = icmp eq i32 [[R0]], ptrtoint ({ i32, [0 x i8], i32 }* [[G]] to i32)
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid2")
+  ; CHECK: ret i1 [[R1]]
+  ret i1 %x
+}
+
+; CHECK: @bar(i8* [[B0:%[^ ]*]])
+define i1 @bar(i8* %p) {
+  ; CHECK: [[S0:%[^ ]*]] = ptrtoint i8* [[B0]] to i32
+  ; CHECK: [[S1:%[^ ]*]] = icmp eq i32 [[S0]],  ptrtoint (i8* getelementptr (i8, i8* bitcast ({ i32, [0 x i8], i32 }* [[G]] to i8*), i32 4) to i32)
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid3")
+  ; CHECK: ret i1 [[S1]]
+  ret i1 %x
+}
+
+; CHECK: @x(
+define i1 @x(i8* %p) {
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1")
+  ret i1 %x
+}

Added: llvm/trunk/test/Transforms/LowerTypeTests/unnamed.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/unnamed.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/unnamed.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/unnamed.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+; CHECK: @{{[0-9]+}} = alias
+; CHECK: @{{[0-9]+}} = alias
+ at 0 = constant i32 1, !type !0
+ at 1 = constant [2 x i32] [i32 2, i32 3], !type !1
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 4, !"typeid1"}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define i1 @foo(i8* %p) {
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1")
+  ret i1 %x
+}

Added: llvm/trunk/test/Transforms/LowerTypeTests/unsat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerTypeTests/unsat.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerTypeTests/unsat.ll (added)
+++ llvm/trunk/test/Transforms/LowerTypeTests/unsat.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,11 @@
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define i1 @foo(i8* %p) {
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1")
+  ; CHECK: ret i1 false
+  ret i1 %x
+}

Added: llvm/trunk/test/Transforms/LowerWidenableCondition/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LowerWidenableCondition/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LowerWidenableCondition/basic.ll (added)
+++ llvm/trunk/test/Transforms/LowerWidenableCondition/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -lower-widenable-condition < %s        | FileCheck %s
+; RUN: opt -S -passes=lower-widenable-condition < %s | FileCheck %s
+
+; Basic test case: make sure that all widenable conditions turn into i1 true.
+define void @f_0(i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @f_0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], true
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]]
+; CHECK:       deopt:
+; CHECK-NEXT:    unreachable
+; CHECK:       guarded:
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[COND_1:%.*]], true
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND4]], label [[GUARDED1:%.*]], label [[DEOPT2:%.*]]
+; CHECK:       deopt2:
+; CHECK-NEXT:    unreachable
+; CHECK:       guarded1:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt
+
+deopt:                                            ; preds = %entry
+  unreachable
+
+guarded:                                          ; preds = %entry
+  %widenable_cond3 = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond4 = and i1 %cond_1, %widenable_cond3
+  br i1 %exiplicit_guard_cond4, label %guarded1, label %deopt2
+
+deopt2:                                           ; preds = %guarded
+  unreachable
+
+guarded1:                                         ; preds = %guarded
+  ret void
+}
+
+; Function Attrs: inaccessiblememonly nounwind
+declare i1 @llvm.experimental.widenable.condition() #0
+
+attributes #0 = { inaccessiblememonly nounwind }

Added: llvm/trunk/test/Transforms/MakeGuardsExplicit/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MakeGuardsExplicit/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MakeGuardsExplicit/basic.ll (added)
+++ llvm/trunk/test/Transforms/MakeGuardsExplicit/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -make-guards-explicit < %s        | FileCheck %s
+; RUN: opt -S -passes=make-guards-explicit < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1,...)
+
+; Check that a sole guard can be turned into explicit guards form.
+define void @trivial_guard(i1 %cond) {
+; CHECK-LABEL: @trivial_guard(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 123, i64 456) ]
+; CHECK-NEXT:    ret void
+; CHECK:       guarded:
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"(i32 123, i64 456) ]
+  ret void
+}
+
+; Check that a sequence of guards can be turned into explicit guards form.
+define void @trivial_guard_sequence(i1 %cond1, i1 %cond2, i1 %cond3) {
+; CHECK-LABEL: @trivial_guard_sequence(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND1:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 123, i64 456) ]
+; CHECK-NEXT:    ret void
+; CHECK:       guarded:
+; CHECK-NEXT:    [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[COND2:%.*]], [[WIDENABLE_COND3]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND4]], label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof !0
+; CHECK:       deopt2:
+; CHECK-NEXT:    call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 789, i64 123) ]
+; CHECK-NEXT:    ret void
+; CHECK:       guarded1:
+; CHECK-NEXT:    [[WIDENABLE_COND7:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND8:%.*]] = and i1 [[COND3:%.*]], [[WIDENABLE_COND7]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND8]], label [[GUARDED5:%.*]], label [[DEOPT6:%.*]], !prof !0
+; CHECK:       deopt6:
+; CHECK-NEXT:    call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 456, i64 789) ]
+; CHECK-NEXT:    ret void
+; CHECK:       guarded5:
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"(i32 123, i64 456) ]
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond2) [ "deopt"(i32 789, i64 123) ]
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond3) [ "deopt"(i32 456, i64 789) ]
+  ret void
+}
+
+; Check that all instructions between the guards preserve.
+define void @split_block_contents(i1 %cond1, i1 %cond2, i1 %cond3, i32* %p) {
+; CHECK-LABEL: @split_block_contents(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 0, i32* [[P:%.*]]
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND1:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 123, i64 456) ]
+; CHECK-NEXT:    ret void
+; CHECK:       guarded:
+; CHECK-NEXT:    store i32 1, i32* [[P]]
+; CHECK-NEXT:    [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[COND2:%.*]], [[WIDENABLE_COND3]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND4]], label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof !0
+; CHECK:       deopt2:
+; CHECK-NEXT:    call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 789, i64 123) ]
+; CHECK-NEXT:    ret void
+; CHECK:       guarded1:
+; CHECK-NEXT:    store i32 2, i32* [[P]]
+; CHECK-NEXT:    [[WIDENABLE_COND7:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND8:%.*]] = and i1 [[COND3:%.*]], [[WIDENABLE_COND7]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND8]], label [[GUARDED5:%.*]], label [[DEOPT6:%.*]], !prof !0
+; CHECK:       deopt6:
+; CHECK-NEXT:    call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 456, i64 789) ]
+; CHECK-NEXT:    ret void
+; CHECK:       guarded5:
+; CHECK-NEXT:    store i32 3, i32* [[P]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i32 0, i32* %p
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"(i32 123, i64 456) ]
+  store i32 1, i32* %p
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond2) [ "deopt"(i32 789, i64 123) ]
+  store i32 2, i32* %p
+  call void(i1, ...) @llvm.experimental.guard(i1 %cond3) [ "deopt"(i32 456, i64 789) ]
+  store i32 3, i32* %p
+  ret void
+}
+
+; Check that the guard can split a loop properly.
+define void @split_loop(i1 %cond, i32 %N, i32 %M) {
+; CHECK-LABEL: @split_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[GUARDED:%.*]] ]
+; CHECK-NEXT:    [[GUARD_COND:%.*]] = icmp slt i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[GUARD_COND]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 123, i64 456) ]
+; CHECK-NEXT:    ret void
+; CHECK:       guarded:
+; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp slt i32 [[IV]], [[M:%.*]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %guard_cond = icmp slt i32 %iv, %N
+  call void(i1, ...) @llvm.experimental.guard(i1 %guard_cond) [ "deopt"(i32 123, i64 456) ]
+  %loop_cond = icmp slt i32 %iv, %M
+  %iv.next = add i32 %iv, 1
+  br i1 %loop_cond, label %loop, label %exit
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,11 @@
+; Uninitialized values are not handled correctly.
+;
+; RUN: opt < %s -mem2reg -disable-output
+;
+
+define i32 @test() {
+        ; To be promoted
+	%X = alloca i32		; <i32*> [#uses=1]
+	%Y = load i32, i32* %X		; <i32> [#uses=1]
+	ret i32 %Y
+}

Added: llvm/trunk/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,12 @@
+; This input caused the mem2reg pass to die because it was trying to promote
+; the %r alloca, even though it is invalid to do so in this case!
+;
+; RUN: opt < %s -mem2reg
+
+define void @test() {
+	%r = alloca i32		; <i32*> [#uses=2]
+	store i32 4, i32* %r
+	store i32* %r, i32** null
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,10 @@
+; RUN: opt < %s -mem2reg
+
+define void @_Z3barv() {
+	%result = alloca i32		; <i32*> [#uses=1]
+	ret void
+		; No predecessors!
+	store i32 0, i32* %result
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; This testcases makes sure that mem2reg can handle unreachable blocks.
+; RUN: opt < %s -mem2reg
+
+define i32 @test() {
+	%X = alloca i32		; <i32*> [#uses=2]
+	store i32 6, i32* %X
+	br label %Loop
+Loop:		; preds = %EndOfLoop, %0
+	store i32 5, i32* %X
+	br label %EndOfLoop
+Unreachable:		; No predecessors!
+	br label %EndOfLoop
+EndOfLoop:		; preds = %Unreachable, %Loop
+	br label %Loop
+}
+

Added: llvm/trunk/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; Mem2reg used to only add one incoming value to a PHI node, even if it had
+; multiple incoming edges from a block.
+;
+; RUN: opt < %s -mem2reg -disable-output
+
+define i32 @test(i1 %c1, i1 %c2) {
+	%X = alloca i32		; <i32*> [#uses=2]
+	br i1 %c1, label %Exit, label %B2
+B2:		; preds = %0
+	store i32 2, i32* %X
+	br i1 %c2, label %Exit, label %Exit
+Exit:		; preds = %B2, %B2, %0
+	%Y = load i32, i32* %X		; <i32> [#uses=1]
+	ret i32 %Y
+}
+

Added: llvm/trunk/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; Promoting some values allows promotion of other values.
+; RUN: opt < %s -mem2reg -S | not grep alloca
+
+define i32 @test2() {
+	%result = alloca i32		; <i32*> [#uses=2]
+	%a = alloca i32		; <i32*> [#uses=2]
+	%p = alloca i32*		; <i32**> [#uses=2]
+	store i32 0, i32* %a
+	store i32* %a, i32** %p
+	%tmp.0 = load i32*, i32** %p		; <i32*> [#uses=1]
+	%tmp.1 = load i32, i32* %tmp.0		; <i32> [#uses=1]
+	store i32 %tmp.1, i32* %result
+	%tmp.2 = load i32, i32* %result		; <i32> [#uses=1]
+	ret i32 %tmp.2
+}
+

Added: llvm/trunk/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; Mem2reg should not insert dead PHI nodes!  The naive algorithm inserts a PHI
+;  node in L3, even though there is no load of %A in anything dominated by L3.
+
+; RUN: opt < %s -mem2reg -S | not grep phi
+
+define void @test(i32 %B, i1 %C) {
+	%A = alloca i32		; <i32*> [#uses=4]
+	store i32 %B, i32* %A
+	br i1 %C, label %L1, label %L2
+L1:		; preds = %0
+	store i32 %B, i32* %A
+	%D = load i32, i32* %A		; <i32> [#uses=1]
+	call void @test( i32 %D, i1 false )
+	br label %L3
+L2:		; preds = %0
+	%E = load i32, i32* %A		; <i32> [#uses=1]
+	call void @test( i32 %E, i1 true )
+	br label %L3
+L3:		; preds = %L2, %L1
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt < %s -mem2reg -instcombine -S | grep store
+; PR590
+
+
+define void @zero(i8* %p, i32 %n) {
+entry:
+	%p_addr = alloca i8*		; <i8**> [#uses=2]
+	%n_addr = alloca i32		; <i32*> [#uses=2]
+	%i = alloca i32		; <i32*> [#uses=6]
+	%out = alloca i32		; <i32*> [#uses=2]
+	%undef = alloca i32		; <i32*> [#uses=2]
+	store i8* %p, i8** %p_addr
+	store i32 %n, i32* %n_addr
+	store i32 0, i32* %i
+	br label %loopentry
+loopentry:		; preds = %endif, %entry
+	%tmp.0 = load i32, i32* %n_addr		; <i32> [#uses=1]
+	%tmp.1 = add i32 %tmp.0, 1		; <i32> [#uses=1]
+	%tmp.2 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp.3 = icmp sgt i32 %tmp.1, %tmp.2		; <i1> [#uses=2]
+	%tmp.4 = zext i1 %tmp.3 to i32		; <i32> [#uses=0]
+	br i1 %tmp.3, label %no_exit, label %return
+no_exit:		; preds = %loopentry
+	%tmp.5 = load i32, i32* %undef		; <i32> [#uses=1]
+	store i32 %tmp.5, i32* %out
+	store i32 0, i32* %undef
+	%tmp.6 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp.7 = icmp sgt i32 %tmp.6, 0		; <i1> [#uses=2]
+	%tmp.8 = zext i1 %tmp.7 to i32		; <i32> [#uses=0]
+	br i1 %tmp.7, label %then, label %endif
+then:		; preds = %no_exit
+	%tmp.9 = load i8*, i8** %p_addr		; <i8*> [#uses=1]
+	%tmp.10 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp.11 = sub i32 %tmp.10, 1		; <i32> [#uses=1]
+	%tmp.12 = getelementptr i8, i8* %tmp.9, i32 %tmp.11		; <i8*> [#uses=1]
+	%tmp.13 = load i32, i32* %out		; <i32> [#uses=1]
+	%tmp.14 = trunc i32 %tmp.13 to i8		; <i8> [#uses=1]
+	store i8 %tmp.14, i8* %tmp.12
+	br label %endif
+endif:		; preds = %then, %no_exit
+	%tmp.15 = load i32, i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp.15, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %loopentry
+return:		; preds = %loopentry
+	ret void
+}

Added: llvm/trunk/test/Transforms/Mem2Reg/2005-11-28-Crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/2005-11-28-Crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/2005-11-28-Crash.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/2005-11-28-Crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,62 @@
+; RUN: opt < %s -mem2reg -disable-output
+; PR670
+
+define void @printk(i32, ...) {
+entry:
+	%flags = alloca i32		; <i32*> [#uses=2]
+	br i1 false, label %then.0, label %endif.0
+then.0:		; preds = %entry
+	br label %endif.0
+endif.0:		; preds = %then.0, %entry
+	store i32 0, i32* %flags
+	br label %loopentry
+loopentry:		; preds = %endif.3, %endif.0
+	br i1 false, label %no_exit, label %loopexit
+no_exit:		; preds = %loopentry
+	br i1 false, label %then.1, label %endif.1
+then.1:		; preds = %no_exit
+	br i1 false, label %shortcirc_done.0, label %shortcirc_next.0
+shortcirc_next.0:		; preds = %then.1
+	br label %shortcirc_done.0
+shortcirc_done.0:		; preds = %shortcirc_next.0, %then.1
+	br i1 false, label %shortcirc_done.1, label %shortcirc_next.1
+shortcirc_next.1:		; preds = %shortcirc_done.0
+	br label %shortcirc_done.1
+shortcirc_done.1:		; preds = %shortcirc_next.1, %shortcirc_done.0
+	br i1 false, label %shortcirc_done.2, label %shortcirc_next.2
+shortcirc_next.2:		; preds = %shortcirc_done.1
+	br label %shortcirc_done.2
+shortcirc_done.2:		; preds = %shortcirc_next.2, %shortcirc_done.1
+	br i1 false, label %then.2, label %endif.2
+then.2:		; preds = %shortcirc_done.2
+	br label %endif.2
+endif.2:		; preds = %then.2, %shortcirc_done.2
+	br label %endif.1
+endif.1:		; preds = %endif.2, %no_exit
+	br i1 false, label %then.3, label %endif.3
+then.3:		; preds = %endif.1
+	br label %endif.3
+endif.3:		; preds = %then.3, %endif.1
+	br label %loopentry
+loopexit:		; preds = %loopentry
+	br label %endif.4
+then.4:		; No predecessors!
+	%tmp.61 = load i32, i32* %flags		; <i32> [#uses=0]
+	br label %out
+dead_block_after_goto:		; No predecessors!
+	br label %endif.4
+endif.4:		; preds = %dead_block_after_goto, %loopexit
+	br i1 false, label %then.5, label %else
+then.5:		; preds = %endif.4
+	br label %endif.5
+else:		; preds = %endif.4
+	br label %endif.5
+endif.5:		; preds = %else, %then.5
+	br label %out
+out:		; preds = %endif.5, %then.4
+	br label %return
+after_ret:		; No predecessors!
+	br label %return
+return:		; preds = %after_ret, %out
+	ret void
+}

Added: llvm/trunk/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt < %s -O3 -S | grep volatile | count 3
+; PR1520
+; Don't promote load volatiles/stores. This is really needed to handle setjmp/lonjmp properly.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.__jmp_buf_tag = type { [6 x i32], i32, %struct.__sigset_t }
+	%struct.__sigset_t = type { [32 x i32] }
+ at j = external global [1 x %struct.__jmp_buf_tag]		; <[1 x %struct.__jmp_buf_tag]*> [#uses=1]
+
+define i32 @f() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%v = alloca i32, align 4		; <i32*> [#uses=3]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store volatile i32 0, i32* %v, align 4
+	%tmp1 = call i32 @_setjmp( %struct.__jmp_buf_tag* getelementptr ([1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* @j, i32 0, i32 0) )		; <i32> [#uses=1]
+	%tmp2 = icmp ne i32 %tmp1, 0		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp23, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb5
+
+bb:		; preds = %entry
+	%tmp4 = load volatile i32, i32* %v, align 4		; <i32> [#uses=1]
+	store i32 %tmp4, i32* %tmp, align 4
+	br label %bb6
+
+bb5:		; preds = %entry
+	store volatile i32 1, i32* %v, align 4
+	call void @g( )
+	store i32 0, i32* %tmp, align 4
+	br label %bb6
+
+bb6:		; preds = %bb5, %bb
+	%tmp7 = load i32, i32* %tmp, align 4		; <i32> [#uses=1]
+	store i32 %tmp7, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %bb6
+	%retval8 = load i32, i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval8
+}
+
+declare i32 @_setjmp(%struct.__jmp_buf_tag*) returns_twice
+
+declare void @g()

Added: llvm/trunk/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/ConvertDebugInfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/ConvertDebugInfo.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/ConvertDebugInfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,52 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+
+define double @testfunc(i32 %i, double %j) nounwind ssp !dbg !1 {
+entry:
+  %i_addr = alloca i32                            ; <i32*> [#uses=2]
+  %j_addr = alloca double                         ; <double*> [#uses=2]
+  %retval = alloca double                         ; <double*> [#uses=2]
+  %0 = alloca double                              ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata i32* %i_addr, metadata !0, metadata !DIExpression()), !dbg !8
+; CHECK: call void @llvm.dbg.value(metadata i32 %i, metadata ![[IVAR:[0-9]*]], metadata {{.*}})
+; CHECK: call void @llvm.dbg.value(metadata double %j, metadata ![[JVAR:[0-9]*]], metadata {{.*}})
+; CHECK: ![[IVAR]] = !DILocalVariable(name: "i"
+; CHECK: ![[JVAR]] = !DILocalVariable(name: "j"
+  store i32 %i, i32* %i_addr
+  call void @llvm.dbg.declare(metadata double* %j_addr, metadata !9, metadata !DIExpression()), !dbg !8
+  store double %j, double* %j_addr
+  %1 = load i32, i32* %i_addr, align 4, !dbg !10       ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, 1, !dbg !10                ; <i32> [#uses=1]
+  %3 = sitofp i32 %2 to double, !dbg !10          ; <double> [#uses=1]
+  %4 = load double, double* %j_addr, align 8, !dbg !10    ; <double> [#uses=1]
+  %5 = fadd double %3, %4, !dbg !10               ; <double> [#uses=1]
+  store double %5, double* %0, align 8, !dbg !10
+  %6 = load double, double* %0, align 8, !dbg !10         ; <double> [#uses=1]
+  store double %6, double* %retval, align 8, !dbg !10
+  br label %return, !dbg !10
+
+return:                                           ; preds = %entry
+  %retval1 = load double, double* %retval, !dbg !10       ; <double> [#uses=1]
+  ret double %retval1, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!14}
+
+!0 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "testfunc", linkageName: "testfunc", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !3, scopeLine: 2, file: !12, scope: !2, type: !4)
+!2 = !DIFile(filename: "testfunc.c", directory: "/tmp")
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: FullDebug, file: !12, enums: !13, retainedTypes: !13)
+!4 = !DISubroutineType(types: !5)
+!5 = !{!6, !7, !6}
+!6 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DILocation(line: 2, scope: !1)
+!9 = !DILocalVariable(name: "j", line: 2, arg: 2, scope: !1, file: !2, type: !6)
+!10 = !DILocation(line: 3, scope: !11)
+!11 = distinct !DILexicalBlock(line: 2, column: 0, file: !12, scope: !1)
+!12 = !DIFile(filename: "testfunc.c", directory: "/tmp")
+!13 = !{}
+!14 = !{i32 1, !"Debug Info Version", i32 3}

Added: llvm/trunk/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt -S -mem2reg <%s | FileCheck %s
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+declare void @foo(i32, i64, i8*)
+
+define void @baz(i32 %a) nounwind ssp !dbg !1 {
+; CHECK-LABEL:  entry:
+; CHECK-NEXT:     %"alloca point" = bitcast i32 0 to i32{{$}}
+; CHECK-NEXT:     call void @llvm.dbg.value(metadata i32 %a,{{.*}}, !dbg
+; CHECK-NEXT:     call void @llvm.dbg.value(metadata i32 %a,{{.*}}, !dbg
+; CHECK-NEXT:     call void @llvm.dbg.value(metadata i64 55,{{.*}}, !dbg
+; CHECK-NEXT:     call void @llvm.dbg.value(metadata i8* bitcast (void (i32)* @baz to i8*),{{.*}}, !dbg
+; CHECK-NEXT:     call void @foo({{.*}}, !dbg
+; CHECK-NEXT:     br label %return, !dbg
+entry:
+  %x_addr.i = alloca i32                          ; <i32*> [#uses=2]
+  %y_addr.i = alloca i64                          ; <i64*> [#uses=2]
+  %z_addr.i = alloca i8*                          ; <i8**> [#uses=2]
+  %a_addr = alloca i32                            ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata i32* %a_addr, metadata !0, metadata !DIExpression()), !dbg !7
+  store i32 %a, i32* %a_addr
+  %0 = load i32, i32* %a_addr, align 4, !dbg !8        ; <i32> [#uses=1]
+  call void @llvm.dbg.declare(metadata i32* %x_addr.i, metadata !9, metadata !DIExpression()) nounwind, !dbg !15
+  store i32 %0, i32* %x_addr.i
+  call void @llvm.dbg.declare(metadata i64* %y_addr.i, metadata !16, metadata !DIExpression()) nounwind, !dbg !15
+  store i64 55, i64* %y_addr.i
+  call void @llvm.dbg.declare(metadata i8** %z_addr.i, metadata !17, metadata !DIExpression()) nounwind, !dbg !15
+  store i8* bitcast (void (i32)* @baz to i8*), i8** %z_addr.i
+  %1 = load i32, i32* %x_addr.i, align 4, !dbg !18     ; <i32> [#uses=1]
+  %2 = load i64, i64* %y_addr.i, align 8, !dbg !18     ; <i64> [#uses=1]
+  %3 = load i8*, i8** %z_addr.i, align 8, !dbg !18     ; <i8*> [#uses=1]
+  call void @foo(i32 %1, i64 %2, i8* %3) nounwind, !dbg !18
+  br label %return, !dbg !19
+
+; CHECK-LABEL:  return:
+; CHECK-NEXT:     ret void, !dbg
+return:                                           ; preds = %entry
+  ret void, !dbg !19
+}
+
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!22}
+!0 = !DILocalVariable(name: "a", line: 8, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "baz", linkageName: "baz", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !3, scopeLine: 8, file: !20, scope: !2, type: !4)
+!2 = !DIFile(filename: "bar.c", directory: "/tmp/")
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: FullDebug, file: !20, enums: !21, retainedTypes: !21)
+!4 = !DISubroutineType(types: !5)
+!5 = !{null, !6}
+!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = !DILocation(line: 8, scope: !1)
+!8 = !DILocation(line: 9, scope: !1)
+!9 = !DILocalVariable(name: "x", line: 4, arg: 1, scope: !10, file: !2, type: !6)
+!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !3, scopeLine: 4, file: !20, scope: !2, type: !11)
+!11 = !DISubroutineType(types: !12)
+!12 = !{null, !6, !13, !14}
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !20, scope: !2, baseType: null)
+!15 = !DILocation(line: 4, scope: !10, inlinedAt: !8)
+!16 = !DILocalVariable(name: "y", line: 4, arg: 2, scope: !10, file: !2, type: !13)
+!17 = !DILocalVariable(name: "z", line: 4, arg: 3, scope: !10, file: !2, type: !14)
+!18 = !DILocation(line: 5, scope: !10, inlinedAt: !8)
+!19 = !DILocation(line: 10, scope: !1)
+!20 = !DIFile(filename: "bar.c", directory: "/tmp/")
+!21 = !{}
+!22 = !{i32 1, !"Debug Info Version", i32 3}

Added: llvm/trunk/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/PromoteMemToRegister.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/PromoteMemToRegister.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/PromoteMemToRegister.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; Simple sanity check testcase.  Both alloca's should be eliminated.
+; RUN: opt < %s -debugify -mem2reg -check-debugify -S 2>&1 | FileCheck %s
+
+; CHECK-NOT: alloca
+; CHECK: CheckModuleDebugify: PASS
+
+define double @testfunc(i32 %i, double %j) {
+	%I = alloca i32		; <i32*> [#uses=4]
+	%J = alloca double		; <double*> [#uses=2]
+	store i32 %i, i32* %I
+	store double %j, double* %J
+	%t1 = load i32, i32* %I		; <i32> [#uses=1]
+	%t2 = add i32 %t1, 1		; <i32> [#uses=1]
+	store i32 %t2, i32* %I
+	%t3 = load i32, i32* %I		; <i32> [#uses=1]
+	%t4 = sitofp i32 %t3 to double		; <double> [#uses=1]
+	%t5 = load double, double* %J		; <double> [#uses=1]
+	%t6 = fmul double %t4, %t5		; <double> [#uses=1]
+	ret double %t6
+}
+

Added: llvm/trunk/test/Transforms/Mem2Reg/UndefValuesMerge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/UndefValuesMerge.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/UndefValuesMerge.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/UndefValuesMerge.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,13 @@
+; RUN: opt < %s -mem2reg -S | not grep phi
+
+define i32 @testfunc(i1 %C, i32 %i, i8 %j) {
+	%I = alloca i32		; <i32*> [#uses=2]
+	br i1 %C, label %T, label %Cont
+T:		; preds = %0
+	store i32 %i, i32* %I
+	br label %Cont
+Cont:		; preds = %T, %0
+	%Y = load i32, i32* %I		; <i32> [#uses=1]
+	ret i32 %Y
+}
+

Added: llvm/trunk/test/Transforms/Mem2Reg/atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/atomic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/atomic.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/atomic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,12 @@
+; RUN: opt -mem2reg < %s -S | FileCheck %s
+
+; mem2reg is allowed with arbitrary atomic operations (although we only support
+; it for atomic load and store at the moment).
+define i32 @test1(i32 %x) {
+; CHECK-LABEL: @test1(
+; CHECK: ret i32 %x
+  %a = alloca i32
+  store atomic i32 %x, i32* %a seq_cst, align 4
+  %r = load atomic i32, i32* %a seq_cst, align 4
+  ret i32 %r
+}

Added: llvm/trunk/test/Transforms/Mem2Reg/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/crash.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt < %s -mem2reg -S
+; PR5023
+
+declare i32 @test1f()
+
+define i32 @test1() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  %whichFlag = alloca i32
+  %A = invoke i32 @test1f()
+          to label %invcont2 unwind label %lpad86
+
+invcont2:
+  store i32 %A, i32* %whichFlag
+  br label %bb15
+
+bb15:
+  %B = load i32, i32* %whichFlag
+  ret i32 %B
+
+lpad86:
+  %exn = landingpad {i8*, i32}
+           cleanup
+  br label %bb15
+  
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+
+define i32 @test2() {
+entry:
+  %whichFlag = alloca i32
+  br label %bb15
+
+bb15:
+  %B = load i32, i32* %whichFlag
+  ret i32 %B
+
+invcont2:
+  %C = load i32, i32* %whichFlag
+  store i32 %C, i32* %whichFlag
+  br label %bb15
+}
+

Added: llvm/trunk/test/Transforms/Mem2Reg/dbg-addr-inline-dse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/dbg-addr-inline-dse.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/dbg-addr-inline-dse.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/dbg-addr-inline-dse.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,94 @@
+; RUN: opt -mem2reg -S < %s | FileCheck %s -implicit-check-not="call void @llvm.dbg.addr"
+
+; This example is intended to simulate this pass pipeline, which may not exist
+; in practice:
+; 1. DSE f from the original C source
+; 2. Inline escape
+; 3. mem2reg
+; This exercises the corner case of multiple llvm.dbg.addr intrinsics.
+
+; C source:
+;
+; void escape(int *px) { ++*px; }
+; extern int global;
+; void f(int x) {
+;   escape(&x);
+;   x = 1; // DSE should delete and insert dbg.value(i32 1)
+;   global = x;
+;   x = 2; // DSE should insert dbg.addr
+;   escape(&x);
+; }
+
+; ModuleID = 'dse.c'
+source_filename = "dse.c"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.24215"
+
+declare void @llvm.dbg.addr(metadata, metadata, metadata) #2
+declare void @llvm.dbg.value(metadata, metadata, metadata) #2
+
+ at global = external global i32, align 4
+
+; Function Attrs: nounwind uwtable
+define void @f(i32 %x) #0 !dbg !8 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.addr(metadata i32* %x.addr, metadata !13, metadata !DIExpression()), !dbg !18
+  %ld.1 = load i32, i32* %x.addr, align 4, !dbg !19
+  %inc.1 = add nsw i32 %ld.1, 1, !dbg !19
+  store i32 %inc.1, i32* %x.addr, align 4, !dbg !19
+  call void @llvm.dbg.value(metadata i32 1, metadata !13, metadata !DIExpression()), !dbg !20
+  store i32 1, i32* @global, align 4, !dbg !22
+  call void @llvm.dbg.addr(metadata i32* %x.addr, metadata !13, metadata !DIExpression()), !dbg !23
+  store i32 2, i32* %x.addr, align 4, !dbg !23
+  %ld.2 = load i32, i32* %x.addr, align 4, !dbg !19
+  %inc.2 = add nsw i32 %ld.2, 1, !dbg !19
+  store i32 %inc.2, i32* %x.addr, align 4, !dbg !19
+  ret void, !dbg !25
+}
+
+; CHECK-LABEL: define void @f(i32 %x)
+; CHECK: call void @llvm.dbg.value(metadata i32 %x, metadata !13, metadata !DIExpression())
+; CHECK: %inc.1 = add nsw i32 %x, 1
+; CHECK: call void @llvm.dbg.value(metadata i32 %inc.1, metadata !13, metadata !DIExpression())
+; CHECK: call void @llvm.dbg.value(metadata i32 1, metadata !13, metadata !DIExpression())
+; CHECK: store i32 1, i32* @global, align 4
+; CHECK: call void @llvm.dbg.value(metadata i32 2, metadata !13, metadata !DIExpression())
+; CHECK: %inc.2 = add nsw i32 2, 1
+; CHECK: call void @llvm.dbg.value(metadata i32 %inc.2, metadata !13, metadata !DIExpression())
+; CHECK: ret void
+
+attributes #0 = { nounwind uwtable }
+attributes #2 = { nounwind readnone speculatable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "dse.c", directory: "C:\5Csrc\5Cllvm-project\5Cbuild")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 2}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!7 = !{!"clang version 6.0.0 "}
+!8 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 3, type: !9, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
+!9 = !DISubroutineType(types: !10)
+!10 = !{null, !11}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !{!13}
+!13 = !DILocalVariable(name: "x", arg: 1, scope: !8, file: !1, line: 3, type: !11)
+!14 = !{!15, !15, i64 0}
+!15 = !{!"int", !16, i64 0}
+!16 = !{!"omnipotent char", !17, i64 0}
+!17 = !{!"Simple C/C++ TBAA"}
+!18 = !DILocation(line: 3, column: 12, scope: !8)
+!19 = !DILocation(line: 4, column: 3, scope: !8)
+!20 = !DILocation(line: 5, column: 5, scope: !8)
+!21 = !DILocation(line: 6, column: 12, scope: !8)
+!22 = !DILocation(line: 6, column: 10, scope: !8)
+!23 = !DILocation(line: 7, column: 5, scope: !8)
+!24 = !DILocation(line: 8, column: 3, scope: !8)
+!25 = !DILocation(line: 9, column: 1, scope: !8)

Added: llvm/trunk/test/Transforms/Mem2Reg/dbg-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/dbg-addr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/dbg-addr.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/dbg-addr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,91 @@
+; RUN: opt -mem2reg -S < %s | FileCheck %s
+
+; ModuleID = 'newvars.c'
+source_filename = "newvars.c"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.24215"
+
+; Function Attrs: nounwind uwtable
+define i32 @if_else(i32 %cond, i32 %a, i32 %b) !dbg !8 {
+entry:
+  %x = alloca i32, align 4
+  call void @llvm.dbg.addr(metadata i32* %x, metadata !16, metadata !DIExpression()), !dbg !26
+  store i32 %a, i32* %x, align 4, !dbg !26, !tbaa !17
+  %tobool = icmp ne i32 %cond, 0, !dbg !28
+  br i1 %tobool, label %if.then, label %if.else, !dbg !30
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* %x, align 4, !dbg !31, !tbaa !17
+  br label %if.end, !dbg !33
+
+if.else:                                          ; preds = %entry
+  store i32 %b, i32* %x, align 4, !dbg !36, !tbaa !17
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %rv = load i32, i32* %x, align 4, !dbg !37, !tbaa !17
+  ret i32 %rv, !dbg !39
+}
+
+; CHECK-LABEL: define i32 @if_else({{.*}})
+; CHECK: entry:
+; CHECK-NOT:   alloca i32
+; CHECK:   call void @llvm.dbg.value(metadata i32 %a, metadata ![[X_LOCAL:[0-9]+]], metadata !DIExpression())
+; CHECK: if.then:                                          ; preds = %entry
+; CHECK:   call void @llvm.dbg.value(metadata i32 0, metadata ![[X_LOCAL]], metadata !DIExpression())
+; CHECK: if.else:                                          ; preds = %entry
+; CHECK:   call void @llvm.dbg.value(metadata i32 %b, metadata ![[X_LOCAL]], metadata !DIExpression())
+; CHECK: if.end:                                           ; preds = %if.else, %if.then
+; CHECK:   %[[PHI:[^ ]*]] = phi i32 [ 0, %if.then ], [ %b, %if.else ]
+; CHECK:   call void @llvm.dbg.value(metadata i32 %[[PHI]], metadata ![[X_LOCAL]], metadata !DIExpression())
+; CHECK:   ret i32
+
+; CHECK: ![[X_LOCAL]] = !DILocalVariable(name: "x", {{.*}})
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+declare void @llvm.dbg.addr(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "newvars.c", directory: "C:\5Csrc\5Cllvm-project\5Cbuild")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 2}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!7 = !{!"clang version 6.0.0 "}
+!8 = distinct !DISubprogram(name: "if_else", scope: !1, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11, !11, !11, !11}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !{!13, !14, !15, !16}
+!13 = !DILocalVariable(name: "b", arg: 3, scope: !8, file: !1, line: 1, type: !11)
+!14 = !DILocalVariable(name: "a", arg: 2, scope: !8, file: !1, line: 1, type: !11)
+!15 = !DILocalVariable(name: "cond", arg: 1, scope: !8, file: !1, line: 1, type: !11)
+!16 = !DILocalVariable(name: "x", scope: !8, file: !1, line: 2, type: !11)
+!17 = !{!18, !18, i64 0}
+!18 = !{!"int", !19, i64 0}
+!19 = !{!"omnipotent char", !20, i64 0}
+!20 = !{!"Simple C/C++ TBAA"}
+!22 = !DILocation(line: 1, column: 34, scope: !8)
+!23 = !DILocation(line: 1, column: 27, scope: !8)
+!24 = !DILocation(line: 1, column: 17, scope: !8)
+!25 = !DILocation(line: 2, column: 3, scope: !8)
+!26 = !DILocation(line: 2, column: 7, scope: !8)
+!27 = !DILocation(line: 2, column: 11, scope: !8)
+!28 = !DILocation(line: 3, column: 7, scope: !29)
+!29 = distinct !DILexicalBlock(scope: !8, file: !1, line: 3, column: 7)
+!30 = !DILocation(line: 3, column: 7, scope: !8)
+!31 = !DILocation(line: 4, column: 7, scope: !32)
+!32 = distinct !DILexicalBlock(scope: !29, file: !1, line: 3, column: 13)
+!33 = !DILocation(line: 5, column: 3, scope: !32)
+!34 = !DILocation(line: 6, column: 9, scope: !35)
+!35 = distinct !DILexicalBlock(scope: !29, file: !1, line: 5, column: 10)
+!36 = !DILocation(line: 6, column: 7, scope: !35)
+!37 = !DILocation(line: 8, column: 10, scope: !8)
+!38 = !DILocation(line: 9, column: 1, scope: !8)
+!39 = !DILocation(line: 8, column: 3, scope: !8)

Added: llvm/trunk/test/Transforms/Mem2Reg/dbg-inline-scope-for-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/dbg-inline-scope-for-phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/dbg-inline-scope-for-phi.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/dbg-inline-scope-for-phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,110 @@
+; RUN: opt -S < %s -mem2reg -verify | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.13.0"
+
+; Original source (with some whitespace removed):
+;
+;   extern int *getp();
+;   extern int cond();
+;   int get1() { return *getp(); }
+;   int get2(int *p) { return *p; }
+;   int bug(int *p) {
+;     if (cond()) return get1();
+;     else return get2(p);
+;   }
+
+define i32 @get1() !dbg !8 {
+  %1 = call i32* (...) @getp(), !dbg !12
+  %2 = load i32, i32* %1, align 4, !dbg !13
+  ret i32 %2, !dbg !14
+}
+
+declare i32* @getp(...)
+
+define i32 @get2(i32*) !dbg !15 {
+  %2 = alloca i32*, align 8
+  store i32* %0, i32** %2, align 8
+  call void @llvm.dbg.declare(metadata i32** %2, metadata !19, metadata !DIExpression()), !dbg !20
+  %3 = load i32*, i32** %2, align 8, !dbg !21
+  %4 = load i32, i32* %3, align 4, !dbg !22
+  ret i32 %4, !dbg !23
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+; CHECK-LABEL: define i32 @bug
+define i32 @bug(i32*) !dbg !24 {
+  %2 = alloca i32, align 4
+  %3 = alloca i32*, align 8
+  store i32* %0, i32** %3, align 8
+  call void @llvm.dbg.declare(metadata i32** %3, metadata !25, metadata !DIExpression()), !dbg !26
+  %4 = call i32 (...) @cond(), !dbg !27
+  %5 = icmp ne i32 %4, 0, !dbg !27
+  br i1 %5, label %6, label %8, !dbg !29
+
+; <label>:6:                                      ; preds = %1
+  %7 = call i32 @get1(), !dbg !30
+  store i32 %7, i32* %2, align 4, !dbg !31
+  br label %11, !dbg !31
+
+; <label>:8:                                      ; preds = %1
+  %9 = load i32*, i32** %3, align 8, !dbg !32
+  %10 = call i32 @get2(i32* %9), !dbg !33
+  store i32 %10, i32* %2, align 4, !dbg !34
+  br label %11, !dbg !34
+
+; <label>:11:                                     ; preds = %8, %6
+  %12 = load i32, i32* %2, align 4, !dbg !35
+  ret i32 %12, !dbg !35
+
+  ; CHECK: [[phi:%.*]] = phi i32 [ {{.*}} ], [ {{.*}} ], !dbg [[mergedLoc:![0-9]+]]
+  ; CHECK-NEXT: ret i32 [[phi]], !dbg [[retLoc:![0-9]+]]
+}
+
+; CHECK: [[commonScope:![0-9]+]] = distinct !DILexicalBlock(scope: {{.*}}, file: !1, line: 15, column: 7)
+; CHECK: [[mergedLoc]] = !DILocation(line: 0, scope: [[commonScope]])
+; CHECK: [[retLoc]] = !DILocation(line: 23, column: 1
+
+declare i32 @cond(...)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Apple LLVM version 9.1.0 (clang-902.2.37.2)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "bug.c", directory: "/bug")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!7 = !{!"Apple LLVM version 9.1.0 (clang-902.2.37.2)"}
+!8 = distinct !DISubprogram(name: "get1", scope: !1, file: !1, line: 6, type: !9, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: false, unit: !0, retainedNodes: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !DILocation(line: 7, column: 11, scope: !8)
+!13 = !DILocation(line: 7, column: 10, scope: !8)
+!14 = !DILocation(line: 7, column: 3, scope: !8)
+!15 = distinct !DISubprogram(name: "get2", scope: !1, file: !1, line: 10, type: !16, isLocal: false, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!16 = !DISubroutineType(types: !17)
+!17 = !{!11, !18}
+!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!19 = !DILocalVariable(name: "p", arg: 1, scope: !15, file: !1, line: 10, type: !18)
+!20 = !DILocation(line: 10, column: 15, scope: !15)
+!21 = !DILocation(line: 11, column: 11, scope: !15)
+!22 = !DILocation(line: 11, column: 10, scope: !15)
+!23 = !DILocation(line: 11, column: 3, scope: !15)
+!24 = distinct !DISubprogram(name: "bug", scope: !1, file: !1, line: 14, type: !16, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!25 = !DILocalVariable(name: "p", arg: 1, scope: !24, file: !1, line: 14, type: !18)
+!26 = !DILocation(line: 14, column: 14, scope: !24)
+!27 = !DILocation(line: 15, column: 7, scope: !28)
+!28 = distinct !DILexicalBlock(scope: !24, file: !1, line: 15, column: 7)
+!29 = !DILocation(line: 15, column: 7, scope: !24)
+!30 = !DILocation(line: 16, column: 12, scope: !28)
+!31 = !DILocation(line: 16, column: 5, scope: !28)
+!32 = !DILocation(line: 18, column: 17, scope: !28)
+!33 = !DILocation(line: 18, column: 12, scope: !28)
+!34 = !DILocation(line: 18, column: 5, scope: !28)
+!35 = !DILocation(line: 23, column: 1, scope: !24)

Added: llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-phi-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-phi-2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-phi-2.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-phi-2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,46 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+source_filename = "bugpoint-output.bc"
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.12.0"
+
+define void @scan() #0 !dbg !12 {
+entry:
+  %entry1 = alloca i1, align 8
+  call void @llvm.dbg.declare(metadata i1* %entry1, metadata !18, metadata !19), !dbg !20
+  store i1 0, i1* %entry1, align 8, !dbg !20
+  br label %for.cond, !dbg !20
+
+for.cond:
+; CHECK: %[[PHI:.*]] = phi i1 [ false, %entry ], [ %0, %for.cond ]
+  %entryN = load i1, i1* %entry1, align 8, !dbg !20
+; CHECK: call void @llvm.dbg.value(metadata i1 %[[PHI]],
+; CHECK-SAME:                      metadata !DIExpression())
+  %0 = add i1 %entryN, 1
+; CHECK: %0 = add i1 %[[PHI]], true
+; CHECK: call void @llvm.dbg.value(metadata i1 %0,
+; CHECK-SAME:                      metadata !DIExpression())
+  store i1 %0, i1* %entry1, align 8, !dbg !20
+  br label %for.cond, !dbg !20
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "adrian", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!2 = !{}
+!4 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{i32 1, !"PIC Level", i32 2}
+!12 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !13, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !15)
+!13 = !DISubroutineType(types: !14)
+!14 = !{null, !4, !4}
+!15 = !{!18}
+!18 = !DILocalVariable(name: "entry", scope: !12, file: !1, line: 6, type: !4)
+!19 = !DIExpression()
+!20 = !DILocation(line: 6, scope: !12)

Added: llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-phi.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,46 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+source_filename = "bugpoint-output.bc"
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.12.0"
+
+define void @scan() #0 !dbg !12 {
+entry:
+  %entry1 = alloca i8, align 8
+  call void @llvm.dbg.declare(metadata i8* %entry1, metadata !18, metadata !19), !dbg !20
+  store i8 0, i8* %entry1, align 8, !dbg !20
+  br label %for.cond, !dbg !20
+
+for.cond:
+; CHECK: %[[PHI:.*]] = phi i8 [ 0, %entry ], [ %0, %for.cond ]
+  %entryN = load i8, i8* %entry1, align 8, !dbg !20
+; CHECK: call void @llvm.dbg.value(metadata i8 %[[PHI]],
+; CHECK-SAME:                      metadata !DIExpression())
+  %0 = add i8 %entryN, 1
+; CHECK: %0 = add i8 %[[PHI]], 1
+; CHECK: call void @llvm.dbg.value(metadata i8 %0,
+; CHECK-SAME:                      metadata !DIExpression())
+  store i8 %0, i8* %entry1, align 8, !dbg !20
+  br label %for.cond, !dbg !20
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "adrian", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!2 = !{}
+!4 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{i32 1, !"PIC Level", i32 2}
+!12 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !13, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !15)
+!13 = !DISubroutineType(types: !14)
+!14 = !{null, !4, !4}
+!15 = !{!18}
+!18 = !DILocalVariable(name: "entry", scope: !12, file: !1, line: 6, type: !4)
+!19 = !DIExpression()
+!20 = !DILocation(line: 6, scope: !12)

Added: llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-vla-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-vla-1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-vla-1.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-vla-1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,62 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+
+; Testing conversion from dbg.declare to dbg.value when the variable is a VLA.
+;
+; We can't derive the size of the variable simply by looking at the
+; metadata. But we can find out the size by examining the alloca, so we should
+; know that the load/store instructions are referencing the whole variable,
+; and we expect to get dbg.value intrinsics that maps %entryN (aka %[[PHI]])
+; and %t0 to the variable allocated as %vla1.
+
+; ModuleID = 'debug-alloca-vla.ll'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.12.0"
+
+; Function Attrs: nounwind ssp uwtable
+define void @scan() #0 !dbg !4 {
+entry:
+  %vla1 = alloca i32, i32 1, align 8
+  call void @llvm.dbg.declare(metadata i32* %vla1, metadata !10, metadata !DIExpression()), !dbg !18
+  br label %for.cond, !dbg !18
+
+for.cond:                                         ; preds = %for.cond, %entry
+; CHECK: %[[PHI:.*]] = phi i32 [ undef, %entry ], [ %t0, %for.cond ]
+  %entryN = load i32, i32* %vla1, align 8, !dbg !18
+; CHECK: call void @llvm.dbg.value(metadata i32 %[[PHI]],
+; CHECK-SAME:                      metadata !DIExpression())
+  %t0 = add i32 %entryN, 1
+; CHECK: %t0 = add i32 %[[PHI]], 1
+; CHECK: call void @llvm.dbg.value(metadata i32 %t0,
+; CHECK-SAME:                      metadata !DIExpression())
+ store i32 %t0, i32* %vla1, align 8, !dbg !18
+  br label %for.cond, !dbg !18
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone speculatable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "adrian", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !{i32 7, !"PIC Level", i32 2}
+!4 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7, !7}
+!7 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "entry", scope: !4, file: !1, line: 6, type: !7)
+!10 = !DILocalVariable(name: "ptr32", scope: !4, file: !1, line: 240, type: !11)
+!11 = !DICompositeType(tag: DW_TAG_array_type, baseType: !12, elements: !14)
+!12 = !DIDerivedType(tag: DW_TAG_typedef, name: "__uint32_t", file: !1, line: 41, baseType: !13)
+!13 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!14 = !{!15}
+!15 = !DISubrange(count: !16)
+!16 = !DILocalVariable(name: "__vla_expr", scope: !4, type: !17, flags: DIFlagArtificial)
+!17 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+!18 = !DILocation(line: 6, scope: !4)

Added: llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-vla-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-vla-2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-vla-2.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/debug-alloca-vla-2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,64 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+
+; Testing conversion from dbg.declare to dbg.value when the variable is a VLA.
+;
+; We can't derive the size of the variable since it is a VLA with an unknown
+; number of element.
+;
+; Verify that we do not get a dbg.value after the phi node (we can't know if
+; the phi nodes result describes the whole array or not).  Also verify that we
+; get a dbg.value that says that we do not know the value of the VLA in place
+; of the store (since we do not know which part of the VLA the store is
+; writing to).
+
+; ModuleID = 'debug-alloca-vla.ll'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.12.0"
+
+; Function Attrs: nounwind ssp uwtable
+define void @scan(i32 %n) #0 !dbg !4 {
+entry:
+  %vla1 = alloca i32, i32 %n, align 8
+  call void @llvm.dbg.declare(metadata i32* %vla1, metadata !10, metadata !DIExpression()), !dbg !18
+  br label %for.cond, !dbg !18
+
+for.cond:                                         ; preds = %for.cond, %entry
+; CHECK: %[[PHI:.*]] = phi i32 [ undef, %entry ], [ %t0, %for.cond ]
+  %entryN = load i32, i32* %vla1, align 8, !dbg !18
+; CHECK-NOT: call void @llvm.dbg.value
+  %t0 = add i32 %entryN, 1
+; CHECK: %t0 = add i32 %[[PHI]], 1
+; CHECK: call void @llvm.dbg.value(metadata i32 undef,
+; CHECK-SAME:                      metadata !DIExpression())
+ store i32 %t0, i32* %vla1, align 8, !dbg !18
+  br label %for.cond, !dbg !18
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone speculatable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "adrian", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !{i32 7, !"PIC Level", i32 2}
+!4 = distinct !DISubprogram(name: "scan", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7, !7}
+!7 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "entry", scope: !4, file: !1, line: 6, type: !7)
+!10 = !DILocalVariable(name: "ptr32", scope: !4, file: !1, line: 240, type: !11)
+!11 = !DICompositeType(tag: DW_TAG_array_type, baseType: !12, elements: !14)
+!12 = !DIDerivedType(tag: DW_TAG_typedef, name: "__uint32_t", file: !1, line: 41, baseType: !13)
+!13 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!14 = !{!15}
+!15 = !DISubrange(count: !16)
+!16 = !DILocalVariable(name: "__vla_expr", scope: !4, type: !17, flags: DIFlagArtificial)
+!17 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+!18 = !DILocation(line: 6, scope: !4)

Added: llvm/trunk/test/Transforms/Mem2Reg/ignore-lifetime.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/ignore-lifetime.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/ignore-lifetime.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/ignore-lifetime.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
+
+declare void @llvm.lifetime.start.p0i8(i64 %size, i8* nocapture %ptr)
+declare void @llvm.lifetime.end.p0i8(i64 %size, i8* nocapture %ptr)
+
+define void @test1() {
+; CHECK: test1
+; CHECK-NOT: alloca
+  %A = alloca i32
+  %B = bitcast i32* %A to i8*
+  call void @llvm.lifetime.start.p0i8(i64 2, i8* %B)
+  store i32 1, i32* %A
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* %B)
+  ret void
+}
+
+define void @test2() {
+; CHECK: test2
+; CHECK-NOT: alloca
+  %A = alloca {i8, i16}
+  %B = getelementptr {i8, i16}, {i8, i16}* %A, i32 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 2, i8* %B)
+  store {i8, i16} zeroinitializer, {i8, i16}* %A
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* %B)
+  ret void
+}

Added: llvm/trunk/test/Transforms/Mem2Reg/optnone.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/optnone.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/optnone.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/optnone.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+
+; This function is optnone, so the allocas should not be eliminated.
+
+; CHECK-LABEL: @testfunc
+; CHECK: alloca
+; CHECK: alloca
+define double @testfunc(i32 %i, double %j) optnone noinline {
+	%I = alloca i32		; <i32*> [#uses=4]
+	%J = alloca double		; <double*> [#uses=2]
+	store i32 %i, i32* %I
+	store double %j, double* %J
+	%t1 = load i32, i32* %I		; <i32> [#uses=1]
+	%t2 = add i32 %t1, 1		; <i32> [#uses=1]
+	store i32 %t2, i32* %I
+	%t3 = load i32, i32* %I		; <i32> [#uses=1]
+	%t4 = sitofp i32 %t3 to double		; <double> [#uses=1]
+	%t5 = load double, double* %J		; <double> [#uses=1]
+	%t6 = fmul double %t4, %t5		; <double> [#uses=1]
+	ret double %t6
+}

Added: llvm/trunk/test/Transforms/Mem2Reg/pr24179.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/pr24179.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/pr24179.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/pr24179.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,45 @@
+; RUN: opt -mem2reg < %s -S | FileCheck %s
+; RUN: opt -passes=mem2reg < %s -S | FileCheck %s
+
+declare i32 @def(i32)
+declare i1 @use(i32)
+
+; Special case of a single-BB alloca does not apply here since the load
+; is affected by the following store. Expect this case to be identified
+; and a PHI node to be created.
+define void @test1() {
+; CHECK-LABEL: @test1(
+ entry:
+  %t = alloca i32
+  br label %loop
+
+ loop:
+  %v = load i32, i32* %t
+  %c = call i1 @use(i32 %v)
+; CHECK: [[PHI:%.*]] = phi i32 [ undef, %entry ], [ %n, %loop ]
+; CHECK: call i1 @use(i32 [[PHI]])
+  %n = call i32 @def(i32 7)
+  store i32 %n, i32* %t
+  br i1 %c, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+; Same as above, except there is no following store. The alloca should just be
+; replaced with an undef
+define void @test2() {
+; CHECK-LABEL: @test2(
+ entry:
+  %t = alloca i32
+  br label %loop
+
+ loop:
+  %v = load i32, i32* %t
+  %c = call i1 @use(i32 %v)
+; CHECK: %c = call i1 @use(i32 undef)
+  br i1 %c, label %loop, label %exit
+
+ exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/Mem2Reg/pr37632-unreachable-list-of-stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/pr37632-unreachable-list-of-stores.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/pr37632-unreachable-list-of-stores.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/pr37632-unreachable-list-of-stores.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt -mem2reg < %s -S | FileCheck %s
+
+
+; CHECK-LABEL: void @patatino()
+; CHECK-NEXT: ret void
+
+; CHECK-LABEL: cantreachme:
+; CHECK-NEXT: %dec = add nsw i32 undef, -1
+; CHECK-NEXT: br label %cantreachme
+
+define void @patatino() {
+  %a = alloca i32, align 4
+  ret void
+cantreachme:
+  %dec = add nsw i32 %tmp, -1
+  store i32 %dec, i32* %a
+  store i32 %tmp, i32* %a
+  %tmp = load i32, i32* %a
+  br label %cantreachme
+}

Added: llvm/trunk/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,89 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+
+; This tests that mem2reg preserves the !nonnull metadata on loads
+; from allocas that get optimized out.
+
+; Check the case where the alloca in question has a single store.
+define float* @single_store(float** %arg) {
+; CHECK-LABEL: define float* @single_store
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+; CHECK: ret float* %arg.load
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float **%buf, !nonnull !0
+  ret float* %buf.load
+}
+
+; Check the case where the alloca in question has more than one
+; store but still within one basic block.
+define float* @single_block(float** %arg) {
+; CHECK-LABEL: define float* @single_block
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+; CHECK: ret float* %arg.load
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  store float* null, float** %buf, align 8
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float **%buf, !nonnull !0
+  ret float* %buf.load
+}
+
+; Check the case where the alloca in question has more than one
+; store and also reads ands writes in multiple blocks.
+define float* @multi_block(float** %arg) {
+; CHECK-LABEL: define float* @multi_block
+; CHECK-LABEL: entry:
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: br label %next
+; CHECK-LABEL: next:
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+; CHECK: ret float* %arg.load
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  store float* null, float** %buf, align 8
+  br label %next
+next:
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float** %buf, !nonnull !0
+  ret float* %buf.load
+}
+
+; Check that we don't add an assume if it's not
+; necessary i.e. the value is already implied to be nonnull
+define float* @no_assume(float** %arg) {
+; CHECK-LABEL: define float* @no_assume
+; CHECK-LABEL: entry:
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: %cn = icmp ne float* %arg.load, null
+; CHECK: br i1 %cn, label %next, label %fin
+; CHECK-LABEL: next:
+; CHECK-NOT: call void @llvm.assume
+; CHECK: ret float* %arg.load
+; CHECK-LABEL: fin:
+; CHECK: ret float* null
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  %cn = icmp ne float* %arg.load, null
+  br i1 %cn, label %next, label %fin
+next:
+; At this point the above nonnull check ensures that
+; the value %arg.load is nonnull in this block and thus
+; we need not add the assume.
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float** %buf, !nonnull !0
+  ret float* %buf.load
+fin:
+  ret float* null
+}
+
+!0 = !{}

Added: llvm/trunk/test/Transforms/Mem2Reg/undef-order.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Mem2Reg/undef-order.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Mem2Reg/undef-order.ll (added)
+++ llvm/trunk/test/Transforms/Mem2Reg/undef-order.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,53 @@
+;RUN: opt -mem2reg -S < %s | FileCheck %s
+
+declare i1 @cond()
+
+define i32 @foo() {
+Entry:
+    %val = alloca i32
+    %c1 = call i1 @cond()
+    br i1 %c1, label %Store1, label %Store2
+Block1:
+    br label %Join
+Block2:
+    br label %Join
+Block3:
+    br label %Join
+Block4:
+    br label %Join
+Block5:
+    br label %Join
+Store1:
+    store i32 1, i32* %val
+    br label %Join
+Block6:
+    br label %Join
+Block7:
+    br label %Join
+Block8:
+    br label %Join
+Block9:
+    br label %Join
+Block10:
+    br label %Join
+Store2:
+    store i32 2, i32* %val
+    br label %Join
+Block11:
+    br label %Join
+Block12:
+    br label %Join
+Block13:
+    br label %Join
+Block14:
+    br label %Join
+Block15:
+    br label %Join
+Block16:
+    br label %Join
+Join:
+; Phi inserted here should have operands appended deterministically
+; CHECK: %val.0 = phi i32 [ 1, %Store1 ], [ 2, %Store2 ], [ undef, %Block1 ], [ undef, %Block2 ], [ undef, %Block3 ], [ undef, %Block4 ], [ undef, %Block5 ], [ undef, %Block6 ], [ undef, %Block7 ], [ undef, %Block8 ], [ undef, %Block9 ], [ undef, %Block10 ], [ undef, %Block11 ], [ undef, %Block12 ], [ undef, %Block13 ], [ undef, %Block14 ], [ undef, %Block15 ], [ undef, %Block16 ]
+    %result = load i32, i32* %val
+    ret i32 %result
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep "call.*initialize" | not grep memtmp
+; PR2077
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+%0 = type { x86_fp80, x86_fp80 }
+
+define internal fastcc void @initialize(%0* noalias nocapture sret %agg.result) nounwind {
+entry:
+  %agg.result.03 = getelementptr %0, %0* %agg.result, i32 0, i32 0
+  store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
+  %agg.result.15 = getelementptr %0, %0* %agg.result, i32 0, i32 1
+  store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
+  ret void
+}
+
+declare fastcc x86_fp80 @passed_uninitialized(%0* nocapture) nounwind
+
+define fastcc void @badly_optimized() nounwind {
+entry:
+  %z = alloca %0
+  %tmp = alloca %0
+  %memtmp = alloca %0, align 8
+  call fastcc void @initialize(%0* noalias sret %memtmp)
+  %tmp1 = bitcast %0* %tmp to i8*
+  %memtmp2 = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %tmp1, i8* align 8 %memtmp2, i32 24, i1 false)
+  %z3 = bitcast %0* %z to i8*
+  %tmp4 = bitcast %0* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %z3, i8* align 8 %tmp4, i32 24, i1 false)
+  %tmp5 = call fastcc x86_fp80 @passed_uninitialized(%0* %z)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind

Added: llvm/trunk/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | not grep "call.*memcpy."
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+%a = type { i32 }
+%b = type { float }
+
+declare void @g(%a* nocapture)
+
+define float @f() {
+entry:
+  %a_var = alloca %a
+  %b_var = alloca %b, align 1
+  call void @g(%a* %a_var)
+  %a_i8 = bitcast %a* %a_var to i8*
+  %b_i8 = bitcast %b* %b_var to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i1 false)
+  %tmp1 = getelementptr %b, %b* %b_var, i32 0, i32 0
+  %tmp2 = load float, float* %tmp1
+  ret float %tmp2
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind

Added: llvm/trunk/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s
+; PR10067
+; Make sure the call+copy isn't optimized in such a way that
+; %ret ends up with the wrong value.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10"
+
+%struct1 = type { i32, i32 }
+%struct2 = type { %struct1, i8* }
+
+declare void @bar(%struct1* nocapture sret %agg.result) nounwind
+
+define i32 @foo() nounwind {
+  %x = alloca %struct1, align 8
+  %y = alloca %struct2, align 8
+  call void @bar(%struct1* sret %x) nounwind
+; CHECK: call void @bar(%struct1* sret %x)
+
+  %gepn1 = getelementptr inbounds %struct2, %struct2* %y, i32 0, i32 0, i32 0
+  store i32 0, i32* %gepn1, align 8
+  %gepn2 = getelementptr inbounds %struct2, %struct2* %y, i32 0, i32 0, i32 1
+  store i32 0, i32* %gepn2, align 4
+
+  %bit1 = bitcast %struct1* %x to i64*
+  %bit2 = bitcast %struct2* %y to i64*
+  %load = load i64, i64* %bit1, align 8
+  store i64 %load, i64* %bit2, align 8
+
+; CHECK: %load = load i64, i64* %bit1, align 8
+; CHECK: store i64 %load, i64* %bit2, align 8
+
+  %gep1 = getelementptr %struct2, %struct2* %y, i32 0, i32 0, i32 0
+  %ret = load i32, i32* %gep1
+  ret i32 %ret
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/align.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/align.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/align.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+; The resulting memset is only 4-byte aligned, despite containing
+; a 16-byte aligned store in the middle.
+
+define void @foo(i32* %p) {
+; CHECK-LABEL: @foo(
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 16, i1 false)
+  %a0 = getelementptr i32, i32* %p, i64 0
+  store i32 0, i32* %a0, align 4
+  %a1 = getelementptr i32, i32* %p, i64 1
+  store i32 0, i32* %a1, align 16
+  %a2 = getelementptr i32, i32* %p, i64 2
+  store i32 0, i32* %a2, align 4
+  %a3 = getelementptr i32, i32* %p, i64 3
+  store i32 0, i32* %a3, align 4
+  ret void
+}
+
+; Replacing %a8 with %a4 in the memset requires boosting the alignment of %a4.
+
+define void @bar() {
+; CHECK-LABEL: @bar(
+; CHECK: %a4 = alloca i32, align 8
+; CHECK-NOT: memcpy
+  %a4 = alloca i32, align 4
+  %a8 = alloca i32, align 8
+  %a8.cast = bitcast i32* %a8 to i8*
+  %a4.cast = bitcast i32* %a4 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %a8.cast, i8 0, i64 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a4.cast, i8* align 4 %a8.cast, i64 4, i1 false)
+  ret void
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/atomic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/atomic.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/atomic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,41 @@
+; RUN: opt -basicaa -memcpyopt -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+ at x = global i32 0
+
+declare void @otherf(i32*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+; memcpyopt should not touch atomic ops
+define void @test1() nounwind uwtable ssp {
+; CHECK: test1
+; CHECK: store atomic
+  %x = alloca [101 x i32], align 16
+  %bc = bitcast [101 x i32]* %x to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 16 %bc, i8 0, i64 400, i1 false)
+  %gep1 = getelementptr inbounds [101 x i32], [101 x i32]* %x, i32 0, i32 100
+  store atomic i32 0, i32* %gep1 unordered, align 4
+  %gep2 = getelementptr inbounds [101 x i32], [101 x i32]* %x, i32 0, i32 0
+  call void @otherf(i32* %gep2)
+  ret void
+}
+
+; memcpyopt across unordered store
+define void @test2() nounwind uwtable ssp {
+; CHECK: test2
+; CHECK: call
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: call
+  %old = alloca i32
+  %new = alloca i32
+  call void @otherf(i32* nocapture %old)
+  store atomic i32 0, i32* @x unordered, align 4
+  %v = load i32, i32* %old
+  store i32 %v, i32* %new
+  call void @otherf(i32* nocapture %new)  
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/MemCpyOpt/callslot_aa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/callslot_aa.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/callslot_aa.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/callslot_aa.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%T = type { i64, i64 }
+
+define void @test(i8* %src) {
+  %tmp = alloca i8
+  %dst = alloca i8
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1 false), !noalias !2
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1 false)
+
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
+
+; Check that the noalias for "dst" was removed by checking that the metadata is gone
+; CHECK-NOT: "dst"
+!0 = !{!0}
+!1 = distinct !{!1, !0, !"dst"}
+!2 = distinct !{!1}

Added: llvm/trunk/test/Transforms/MemCpyOpt/callslot_deref.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/callslot_deref.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/callslot_deref.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/callslot_deref.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) unnamed_addr nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+; all bytes of %dst that are touch by the memset are dereferenceable
+define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) {
+; CHECK-LABEL: @must_remove_memcpy(
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64
+  %src = alloca [4096 x i8], align 1
+  %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i1 false) #2
+  ret void
+}
+
+; memset touch more bytes than those guaranteed to be dereferenceable
+; We can't remove the memcpy, but we can turn it into an independent memset.
+define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) {
+; CHECK-LABEL: @must_not_remove_memcpy(
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK: call void @llvm.memset.p0i8.i64
+  %src = alloca [4096 x i8], align 1
+  %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i1 false) #2
+  ret void
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/callslot_throw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/callslot_throw.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/callslot_throw.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/callslot_throw.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; RUN: opt -S -memcpyopt < %s | FileCheck %s
+declare void @may_throw(i32* nocapture %x)
+
+; CHECK-LABEL: define void @test1(
+define void @test1(i32* nocapture noalias dereferenceable(4) %x) {
+entry:
+  %t = alloca i32, align 4
+  call void @may_throw(i32* nonnull %t)
+  %load = load i32, i32* %t, align 4
+  store i32 %load, i32* %x, align 4
+; CHECK:       %[[t:.*]] = alloca i32, align 4
+; CHECK-NEXT:  call void @may_throw(i32* {{.*}} %[[t]])
+; CHECK-NEXT:  %[[load:.*]] = load i32, i32* %[[t]], align 4
+; CHECK-NEXT:  store i32 %[[load]], i32* %x, align 4
+  ret void
+}
+
+declare void @always_throws()
+
+; CHECK-LABEL: define void @test2(
+define void @test2(i32* nocapture noalias dereferenceable(4) %x) {
+entry:
+  %t = alloca i32, align 4
+  call void @may_throw(i32* nonnull %t) nounwind
+  %load = load i32, i32* %t, align 4
+  call void @always_throws()
+  store i32 %load, i32* %x, align 4
+; CHECK:       %[[t:.*]] = alloca i32, align 4
+; CHECK-NEXT:  call void @may_throw(i32* {{.*}} %[[t]])
+; CHECK-NEXT:  %[[load:.*]] = load i32, i32* %[[t]], align 4
+; CHECK-NEXT:  call void @always_throws()
+; CHECK-NEXT:  store i32 %[[load]], i32* %x, align 4
+  ret void
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/capturing-func.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/capturing-func.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/capturing-func.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/capturing-func.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s
+
+target datalayout = "e"
+
+declare void @foo(i8*)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+
+define void @test() {
+  %ptr1 = alloca i8
+  %ptr2 = alloca i8
+  call void @foo(i8* %ptr2)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
+  call void @foo(i8* %ptr1)
+  ret void
+
+  ; Check that the transformation isn't applied if the called function can
+  ; capture the pointer argument (i.e. the nocapture attribute isn't present)
+  ; CHECK-LABEL: @test(
+  ; CHECK: call void @foo(i8* %ptr2)
+  ; CHECK-NEXT: call void @llvm.memcpy
+  ; CHECK-NEXT: call void @foo(i8* %ptr1)
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/crash.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,55 @@
+; RUN: opt < %s -basicaa -memcpyopt -disable-output
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%struct.qw = type { [4 x float] }
+%struct.bar = type { %struct.qw, %struct.qw, %struct.qw, %struct.qw, %struct.qw, float, float}
+
+; PR4882
+define void @test1(%struct.bar* %this) {
+entry:
+  %0 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 0, i32 0, i32 0
+  store float 0.000000e+00, float* %0, align 4
+  %1 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 0, i32 0, i32 1
+  store float 0.000000e+00, float* %1, align 4
+  %2 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 0, i32 0, i32 2
+  store float 0.000000e+00, float* %2, align 4
+  %3 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 0, i32 0, i32 3
+  store float 0.000000e+00, float* %3, align 4
+  %4 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 1, i32 0, i32 0
+  store float 0.000000e+00, float* %4, align 4
+  %5 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 1, i32 0, i32 1
+  store float 0.000000e+00, float* %5, align 4
+  %6 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 1, i32 0, i32 2
+  store float 0.000000e+00, float* %6, align 4
+  %7 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 1, i32 0, i32 3
+  store float 0.000000e+00, float* %7, align 4
+  %8 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 3, i32 0, i32 1
+  store float 0.000000e+00, float* %8, align 4
+  %9 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 3, i32 0, i32 2
+  store float 0.000000e+00, float* %9, align 4
+  %10 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 3, i32 0, i32 3
+  store float 0.000000e+00, float* %10, align 4
+  %11 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 4, i32 0, i32 0
+  store float 0.000000e+00, float* %11, align 4
+  %12 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 4, i32 0, i32 1
+  store float 0.000000e+00, float* %12, align 4
+  %13 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 4, i32 0, i32 2
+  store float 0.000000e+00, float* %13, align 4
+  %14 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 4, i32 0, i32 3
+  store float 0.000000e+00, float* %14, align 4
+  %15 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 5
+  store float 0.000000e+00, float* %15, align 4
+  unreachable
+}
+
+; PR8753
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+define void @test2(i32 %cmd) nounwind {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 20, i1 false) nounwind
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* undef, i64 20, i1 false) nounwind
+  ret void
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/fca2memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/fca2memcpy.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/fca2memcpy.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/fca2memcpy.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,129 @@
+; RUN: opt -memcpyopt -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%S = type { i8*, i8, i32 }
+
+define void @copy(%S* %src, %S* %dst) {
+; CHECK-LABEL: copy
+; CHECK-NOT: load
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i64
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  store %S %1, %S* %dst
+  ret void
+}
+
+define void @noaliassrc(%S* noalias %src, %S* %dst) {
+; CHECK-LABEL: noaliassrc
+; CHECK-NOT: load
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  store %S %1, %S* %dst
+  ret void
+}
+
+define void @noaliasdst(%S* %src, %S* noalias %dst) {
+; CHECK-LABEL: noaliasdst
+; CHECK-NOT: load
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  store %S %1, %S* %dst
+  ret void
+}
+
+define void @destroysrc(%S* %src, %S* %dst) {
+; CHECK-LABEL: destroysrc
+; CHECK: load %S, %S* %src
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK-NEXT: store %S %1, %S* %dst
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  store %S zeroinitializer, %S* %src
+  store %S %1, %S* %dst
+  ret void
+}
+
+define void @destroynoaliassrc(%S* noalias %src, %S* %dst) {
+; CHECK-LABEL: destroynoaliassrc
+; CHECK-NOT: load
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  store %S zeroinitializer, %S* %src
+  store %S %1, %S* %dst
+  ret void
+}
+
+define void @copyalias(%S* %src, %S* %dst) {
+; CHECK-LABEL: copyalias
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %S, %S* %src
+; CHECK-NOT: load
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i64
+; CHECK-NEXT: store %S [[LOAD]], %S* %dst
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  %2 = load %S, %S* %src
+  store %S %1, %S* %dst
+  store %S %2, %S* %dst
+  ret void
+}
+
+; If the store address is computed in a complex manner, make
+; sure we lift the computation as well if needed and possible.
+define void @addrproducer(%S* %src, %S* %dst) {
+; CHECK-LABEL: addrproducer(
+; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8*
+; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i64 1
+; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8*
+; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8*
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false)
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  store %S undef, %S* %dst
+  %dst2 = getelementptr %S , %S* %dst, i64 1
+  store %S %1, %S* %dst2
+  ret void
+}
+
+define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) {
+; CHECK-LABEL: aliasaddrproducer(
+; CHECK-NEXT: %[[SRC:[0-9]+]] = load %S, %S* %src
+; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8*
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false)
+; CHECK-NEXT: %dstindex = load i32, i32* %dstidptr
+; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex
+; CHECK-NEXT: store %S %[[SRC]], %S* %dst2
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  store %S undef, %S* %dst
+  %dstindex = load i32, i32* %dstidptr
+  %dst2 = getelementptr %S , %S* %dst, i32 %dstindex
+  store %S %1, %S* %dst2
+  ret void
+}
+
+define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) {
+; CHECK-LABEL: noaliasaddrproducer(
+; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8*
+; CHECK-NEXT: %[[LOADED:[0-9]+]] = load i32, i32* %dstidptr
+; CHECK-NEXT: %dstindex = or i32 %[[LOADED]], 1
+; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex
+; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8*
+; CHECK-NEXT: %[[SRCCAST2:[0-9]+]] = bitcast %S* %src to i8*
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST2]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[SRCCAST]], i8 undef, i64 16, i1 false)
+; CHECK-NEXT: ret void
+  %1 = load %S, %S* %src
+  store %S undef, %S* %src
+  %2 = load i32, i32* %dstidptr
+  %dstindex = or i32 %2, 1
+  %dst2 = getelementptr %S , %S* %dst, i32 %dstindex
+  store %S %1, %S* %dst2
+  ret void
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/form-memset.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,301 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+
+; All the stores in this example should be merged into a single memset.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @test1(i8 signext  %c) nounwind  {
+entry:
+	%x = alloca [19 x i8]		; <[19 x i8]*> [#uses=20]
+	%tmp = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp, align 1
+	%tmp5 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 1		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp5, align 1
+	%tmp9 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 2		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp9, align 1
+	%tmp13 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 3		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp13, align 1
+	%tmp17 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 4		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp17, align 1
+	%tmp21 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 5		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp21, align 1
+	%tmp25 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 6		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp25, align 1
+	%tmp29 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 7		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp29, align 1
+	%tmp33 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 8		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp33, align 1
+	%tmp37 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 9		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp37, align 1
+	%tmp41 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 10		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp41, align 1
+	%tmp45 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 11		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp45, align 1
+	%tmp49 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 12		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp49, align 1
+	%tmp53 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 13		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp53, align 1
+	%tmp57 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 14		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp57, align 1
+	%tmp61 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 15		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp61, align 1
+	%tmp65 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 16		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp65, align 1
+	%tmp69 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 17		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp69, align 1
+	%tmp73 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 18		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp73, align 1
+	%tmp76 = call i32 (...) @bar( [19 x i8]* %x ) nounwind
+	ret void
+; CHECK-LABEL: @test1(
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK-NOT: store
+; CHECK: ret
+}
+
+declare i32 @bar(...)
+
+%struct.MV = type { i16, i16 }
+
+
+define void @test2() nounwind  {
+entry:
+	%ref_idx = alloca [8 x i8]		; <[8 x i8]*> [#uses=8]
+	%left_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
+	%up_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
+	%tmp20 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 7		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp20, align 1
+	%tmp23 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 6		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp23, align 1
+	%tmp26 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 5		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp26, align 1
+	%tmp29 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 4		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp29, align 1
+	%tmp32 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 3		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp32, align 1
+	%tmp35 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 2		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp35, align 1
+	%tmp38 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 1		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp38, align 1
+	%tmp41 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 0		; <i8*> [#uses=2]
+	store i8 -1, i8* %tmp41, align 1
+	%tmp43 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp43, align 2
+	%tmp46 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp46, align 2
+	%tmp57 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp57, align 2
+	%tmp60 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp60, align 2
+	%tmp71 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp71, align 2
+	%tmp74 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp74, align 2
+	%tmp85 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp85, align 2
+	%tmp88 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp88, align 2
+	%tmp99 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp99, align 2
+	%tmp102 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp102, align 2
+	%tmp113 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp113, align 2
+	%tmp116 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp116, align 2
+	%tmp127 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp127, align 2
+	%tmp130 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp130, align 2
+	%tmp141 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp141, align 8
+	%tmp144 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp144, align 2
+	%tmp148 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp148, align 2
+	%tmp151 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp151, align 2
+	%tmp162 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp162, align 2
+	%tmp165 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp165, align 2
+	%tmp176 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp176, align 2
+	%tmp179 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp179, align 2
+	%tmp190 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp190, align 2
+	%tmp193 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp193, align 2
+	%tmp204 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp204, align 2
+	%tmp207 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp207, align 2
+	%tmp218 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp218, align 2
+	%tmp221 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp221, align 2
+	%tmp232 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp232, align 2
+	%tmp235 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp235, align 2
+	%tmp246 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp246, align 8
+	%tmp249 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp249, align 2
+	%up_mvd252 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
+	%left_mvd253 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
+	call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind 
+	ret void
+        
+; CHECK-LABEL: @test2(
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %tmp41, i8 -1, i64 8, i1 false)
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 32, i1 false)
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %1, i8 0, i64 32, i1 false)
+; CHECK-NOT: store
+; CHECK: ret
+}
+
+declare void @foo(%struct.MV*, %struct.MV*, i8*)
+
+
+; Store followed by memset.
+define void @test3(i32* nocapture %P) nounwind ssp {
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 0, i32* %arrayidx, align 4
+  %add.ptr = getelementptr inbounds i32, i32* %P, i64 2
+  %0 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false)
+  ret void
+; CHECK-LABEL: @test3(
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false)
+}
+
+; store followed by memset, different offset scenario
+define void @test4(i32* nocapture %P) nounwind ssp {
+entry:
+  store i32 0, i32* %P, align 4
+  %add.ptr = getelementptr inbounds i32, i32* %P, i64 1
+  %0 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false)
+  ret void
+; CHECK-LABEL: @test4(
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false)
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+; Memset followed by store.
+define void @test5(i32* nocapture %P) nounwind ssp {
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %P, i64 2
+  %0 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false)
+  %arrayidx = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 0, i32* %arrayidx, align 4
+  ret void
+; CHECK-LABEL: @test5(
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false)
+}
+
+;; Memset followed by memset.
+define void @test6(i32* nocapture %P) nounwind ssp {
+entry:
+  %0 = bitcast i32* %P to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i1 false)
+  %add.ptr = getelementptr inbounds i32, i32* %P, i64 3
+  %1 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i1 false)
+  ret void
+; CHECK-LABEL: @test6(
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %2, i8 0, i64 24, i1 false)
+}
+
+; More aggressive heuristic
+; rdar://9892684
+define void @test7(i32* nocapture %c) nounwind optsize {
+  store i32 -1, i32* %c, align 4
+  %1 = getelementptr inbounds i32, i32* %c, i32 1
+  store i32 -1, i32* %1, align 4
+  %2 = getelementptr inbounds i32, i32* %c, i32 2
+  store i32 -1, i32* %2, align 4
+  %3 = getelementptr inbounds i32, i32* %c, i32 3
+  store i32 -1, i32* %3, align 4
+  %4 = getelementptr inbounds i32, i32* %c, i32 4
+  store i32 -1, i32* %4, align 4
+; CHECK-LABEL: @test7(
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %5, i8 -1, i64 20, i1 false)
+  ret void
+}
+
+%struct.test8 = type { [4 x i32] }
+
+define void @test8() {
+entry:
+  %memtmp = alloca %struct.test8, align 16
+  %0 = bitcast %struct.test8* %memtmp to <4 x i32>*
+  store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
+  ret void
+; CHECK-LABEL: @test8(
+; CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
+}
+
+ at test9buf = internal unnamed_addr global [16 x i64] zeroinitializer, align 16
+
+define void @test9() nounwind {
+  store i8 -1, i8* bitcast ([16 x i64]* @test9buf to i8*), align 16
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 1), align 1
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 2), align 2
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 3), align 1
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 4), align 4
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 5), align 1
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 6), align 2
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 7), align 1
+  store i8 -1, i8* bitcast (i64* getelementptr inbounds ([16 x i64], [16 x i64]* @test9buf, i64 0, i64 1) to i8*), align 8
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 9), align 1
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 10), align 2
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 11), align 1
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 12), align 4
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 13), align 1
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 14), align 2
+  store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 15), align 1
+  ret void
+; CHECK-LABEL: @test9(
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i1 false)
+}
+
+; PR19092
+define void @test10(i8* nocapture %P) nounwind {
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i1 false)
+  ret void
+; CHECK-LABEL: @test10(
+; CHECK-NOT: memset
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 0, i64 42, i1 false)
+; CHECK-NOT: memset
+; CHECK: ret void
+}
+
+; Memset followed by odd store.
+define void @test11(i32* nocapture %P) nounwind ssp {
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %P, i64 3
+  %0 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 1, i64 11, i1 false)
+  %arrayidx = getelementptr inbounds i32, i32* %P, i64 0
+  %arrayidx.cast = bitcast i32* %arrayidx to i96*
+  store i96 310698676526526814092329217, i96* %arrayidx.cast, align 4
+  ret void
+; CHECK-LABEL: @test11(
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 1, i64 23, i1 false)
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/invariant.start.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/invariant.start.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/invariant.start.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/invariant.start.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; MemCpy optimizations should take place even in presence of invariant.start
+; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+target triple = "i686-apple-darwin9"
+
+%0 = type { x86_fp80, x86_fp80 }
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
+
+declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
+
+; FIXME: The invariant.start does not modify %P.
+; The intermediate alloca and one of the memcpy's should be eliminated, the
+; other should be transformed to a memmove.
+define void @test1(i8* %P, i8* %Q) nounwind  {
+  %memtmp = alloca %0, align 16
+  %R = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false)
+  %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false)
+  ret void
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: %memtmp = alloca %0, align 16
+; CHECK-NEXT: %R = bitcast %0* %memtmp to i8*
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false)
+; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false)
+; CHECK-NEXT: ret void
+}
+
+
+; The invariant.start intrinsic does not inhibit tranforming the memcpy to a
+; memset.
+define void @test2(i8* %dst1, i8* %dst2, i8 %c) {
+; CHECK-LABEL: define void @test2(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst2, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: ret void
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+  %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 128, i1 false)
+  ret void
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/lifetime.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/lifetime.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/lifetime.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/lifetime.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt < %s -O1 -S | FileCheck %s
+
+; performCallSlotOptzn in MemCpy should not exchange the calls to
+; @llvm.lifetime.start and @llvm.memcpy.
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+define void @_ZN4CordC2EOS_(i8* nocapture dereferenceable(16) %arg1) {
+bb:
+; CHECK-LABEL: @_ZN4CordC2EOS_
+; CHECK-NOT: call void @llvm.lifetime.start
+; CHECK: ret void
+  %tmp = alloca [8 x i8], align 8
+  %tmp5 = bitcast [8 x i8]* %tmp to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %tmp5)
+  %tmp10 = getelementptr inbounds i8, i8* %tmp5, i64 7
+  store i8 0, i8* %tmp10, align 1
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %arg1, i8* align 8 %tmp5, i64 16, i1 false)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %tmp5)
+  ret void
+}
+
+attributes #1 = { argmemonly nounwind }

Added: llvm/trunk/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -basicaa -scoped-noalias -memcpyopt -S %s | FileCheck %s
+
+%T = type { i8, i32 }
+
+; Ensure load-store forwarding of an aggregate is interpreted as
+; a memmove when the source and dest may alias
+define void @test_memmove(%T* align 8 %a, %T* align 16 %b) {
+; CHECK-LABEL: @test_memmove(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %T* [[B:%.*]] to i8*
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %T* [[A:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* align 16 [[TMP1]], i8* align 8 [[TMP2]], i64 8, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %val = load %T, %T* %a, align 8
+  store %T %val, %T* %b, align 16
+  ret void
+}
+
+; Ensure load-store forwarding of an aggregate is interpreted as
+; a memcpy when the source and dest do not alias
+define void @test_memcpy(%T* noalias align 8 %a, %T* noalias align 16 %b) {
+; CHECK-LABEL: @test_memcpy(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %T* [[B:%.*]] to i8*
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %T* [[A:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP1]], i8* align 8 [[TMP2]], i64 8, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %val = load %T, %T* %a, align 8
+  store %T %val, %T* %b, align 16
+  ret void
+}
+
+; memcpy(%d, %a) should not be generated since store2 may-aliases load %a.
+define void @f(%T* %a, %T* %b, %T* %c, %T* %d) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:    [[VAL:%.*]] = load %T, %T* %a, !alias.scope !0
+; CHECK-NEXT:    store %T { i8 23, i32 23 }, %T* %b, !alias.scope !3
+; CHECK-NEXT:    store %T { i8 44, i32 44 }, %T* %c, !alias.scope !6, !noalias !3
+; CHECK-NEXT:    store %T [[VAL]], %T* %d, !alias.scope !9, !noalias !12
+; CHECK-NEXT:    ret void
+;
+  %val = load %T, %T* %a, !alias.scope !{!10}
+
+  ; store1 may-aliases the load
+  store %T { i8 23, i32 23 }, %T* %b, !alias.scope !{!11}
+
+  ; store2 may-aliases the load and store3
+  store %T { i8 44, i32 44 }, %T* %c, !alias.scope !{!12}, !noalias !{!11}
+
+  ; store3
+  store %T %val, %T* %d, !alias.scope !{!13}, !noalias !{!10, !11}
+  ret void
+}
+
+!0 = !{!0}
+!1 = !{!1}
+!2 = !{!2}
+!3 = !{!3}
+
+!10 = !{ !10, !0 }
+!11 = !{ !11, !1 }
+!12 = !{ !12, !2 }
+!13 = !{ !13, !3 }

Added: llvm/trunk/test/Transforms/MemCpyOpt/loadstore-sret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/loadstore-sret.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/loadstore-sret.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/loadstore-sret.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt -S < %s -basicaa -memcpyopt | FileCheck %s
+; <rdar://problem/8536696>
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%"class.std::auto_ptr" = type { i32* }
+
+; CHECK-LABEL: @_Z3foov(
+define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) ssp {
+_ZNSt8auto_ptrIiED1Ev.exit:
+  %temp.lvalue = alloca %"class.std::auto_ptr", align 8
+; CHECK: call void @_Z3barv(%"class.std::auto_ptr"* sret %agg.result)
+  call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue)
+  %tmp.i.i = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0
+; CHECK-NOT: load
+  %tmp2.i.i = load i32*, i32** %tmp.i.i, align 8
+  %tmp.i.i4 = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %agg.result, i64 0, i32 0
+; CHECK-NOT: store
+  store i32* %tmp2.i.i, i32** %tmp.i.i4, align 8
+; CHECK: ret void
+  ret void
+}
+
+declare void @_Z3barv(%"class.std::auto_ptr"* nocapture sret) nounwind

Added: llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,55 @@
+; RUN: opt -basicaa -memcpyopt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo([8 x i64]* noalias nocapture sret dereferenceable(64) %sret) {
+entry-block:
+  %a = alloca [8 x i64], align 8
+  %a.cast = bitcast [8 x i64]* %a to i8*
+  call void @llvm.lifetime.start.p0i8(i64 64, i8* %a.cast)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %a.cast, i8 0, i64 64, i1 false)
+  %sret.cast = bitcast [8 x i64]* %sret to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %sret.cast, i8* align 8 %a.cast, i64 64, i1 false)
+  call void @llvm.lifetime.end.p0i8(i64 64, i8* %a.cast)
+  ret void
+
+; CHECK-LABEL: @foo(
+; CHECK:         %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 8 %[[sret_cast]], i8 0, i64 64
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret void
+}
+
+define void @bar([8 x i64]* noalias nocapture sret dereferenceable(64) %sret, [8 x i64]* noalias nocapture dereferenceable(64) %out) {
+entry-block:
+  %a = alloca [8 x i64], align 8
+  %a.cast = bitcast [8 x i64]* %a to i8*
+  call void @llvm.lifetime.start.p0i8(i64 64, i8* %a.cast)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %a.cast, i8 0, i64 64, i1 false)
+  %sret.cast = bitcast [8 x i64]* %sret to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %sret.cast, i8* align 8 %a.cast, i64 64, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %a.cast, i8 42, i64 32, i1 false)
+  %out.cast = bitcast [8 x i64]* %out to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %out.cast, i8* align 8 %a.cast, i64 64, i1 false)
+  call void @llvm.lifetime.end.p0i8(i64 64, i8* %a.cast)
+  ret void
+
+; CHECK-LABEL: @bar(
+; CHECK:         %[[a:[^=]+]] = alloca [8 x i64]
+; CHECK:         %[[a_cast:[^=]+]] = bitcast [8 x i64]* %[[a]] to i8*
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* nonnull align 8 %[[a_cast]], i8 0, i64 64
+; CHECK:         %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8*
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* nonnull align 8 %[[sret_cast]], i8 0, i64 64
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* nonnull align 8 %[[a_cast]], i8 42, i64 32
+; CHECK:         %[[out_cast:[^=]+]] = bitcast [8 x i64]* %out to i8*
+; CHECK:         call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %[[out_cast]], i8* nonnull align 8 %[[a_cast]], i64 64
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind

Added: llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,89 @@
+; RUN: opt -memcpyopt -S < %s | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+ at undef = internal constant i32 undef, align 4
+define void @test_undef() nounwind {
+  %a = alloca i32, align 4
+  %i8 = bitcast i32* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (i32* @undef to i8*), i64 4, i1 false)
+  ret void
+; CHECK-LABEL: @test_undef(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+ at i32x3 = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
+define void @test_i32x3() nounwind {
+  %a = alloca [3 x i32], align 4
+  %i8 = bitcast [3 x i32]* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3 to i8*), i64 12, i1 false)
+  ret void
+; CHECK-LABEL: @test_i32x3(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+ at i32x3_undef = internal constant [3 x i32] [i32 -1, i32 undef, i32 -1], align 4
+define void @test_i32x3_undef() nounwind {
+  %a = alloca [3 x i32], align 4
+  %i8 = bitcast [3 x i32]* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3_undef to i8*), i64 12, i1 false)
+  ret void
+; CHECK-LABEL: @test_i32x3_undef(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+%struct.bitfield = type { i8, [3 x i8] }
+ at bitfield = private unnamed_addr constant %struct.bitfield { i8 -86, [3 x i8] [i8 -86, i8 -86, i8 -86] }, align 4
+define void @test_bitfield() nounwind {
+  %a = alloca %struct.bitfield, align 4
+  %i8 = bitcast %struct.bitfield* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (%struct.bitfield* @bitfield to i8*), i64 4, i1 false)
+  ret void
+; CHECK-LABEL: @test_bitfield(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+ at i1x16_zero = internal constant <16 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, align 4
+define void @test_i1x16_zero() nounwind {
+  %a = alloca <16 x i1>, align 4
+  %i8 = bitcast <16 x i1>* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_zero to i8*), i64 16, i1 false)
+  ret void
+; CHECK-LABEL: @test_i1x16_zero(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+; i1 isn't currently handled. Should it?
+ at i1x16_one = internal constant <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, align 4
+define void @test_i1x16_one() nounwind {
+  %a = alloca <16 x i1>, align 4
+  %i8 = bitcast <16 x i1>* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_one to i8*), i64 16, i1 false)
+  ret void
+; CHECK-LABEL: @test_i1x16_one(
+; CHECK-NOT:   call void @llvm.memset
+; CHECK:      call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+ at half = internal constant half 0xH0000, align 4
+define void @test_half() nounwind {
+  %a = alloca half, align 4
+  %i8 = bitcast half* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (half* @half to i8*), i64 2, i1 false)
+  ret void
+; CHECK-LABEL: @test_half(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/memcpy-undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memcpy-undef.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/memcpy-undef.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/memcpy-undef.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,46 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.foo = type { i8, [7 x i8], i32 }
+
+define i32 @test1(%struct.foo* nocapture %foobie) nounwind noinline ssp uwtable {
+  %bletch.sroa.1 = alloca [7 x i8], align 1
+  %1 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 0
+  store i8 98, i8* %1, align 4
+  %2 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 1, i64 0
+  %3 = getelementptr inbounds [7 x i8], [7 x i8]* %bletch.sroa.1, i64 0, i64 0
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 7, i1 false)
+  %4 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 2
+  store i32 20, i32* %4, align 4
+  ret i32 undef
+
+; Check that the memcpy is removed.
+; CHECK-LABEL: @test1(
+; CHECK-NOT: call void @llvm.memcpy
+}
+
+define void @test2(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable {
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %in)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i1 false)
+  ret void
+
+; Check that the memcpy is removed.
+; CHECK-LABEL: @test2(
+; CHECK-NOT: call void @llvm.memcpy
+}
+
+define void @test3(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable {
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %in)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i1 false)
+  ret void
+
+; Check that the memcpy is not removed.
+; CHECK-LABEL: @test3(
+; CHECK: call void @llvm.memcpy
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind

Added: llvm/trunk/test/Transforms/MemCpyOpt/memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memcpy.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/memcpy.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/memcpy.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,253 @@
+; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck -enable-var-scope %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+
+%0 = type { x86_fp80, x86_fp80 }
+%1 = type { i32, i32 }
+
+define void @test1(%0* sret  %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind  {
+entry:
+  %tmp2 = alloca %0
+  %memtmp = alloca %0, align 16
+  %tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1
+  call void @ccoshl(%0* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0) nounwind
+  %tmp219 = bitcast %0* %tmp2 to i8*
+  %memtmp20 = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %tmp219, i8* align 16 %memtmp20, i32 32, i1 false)
+  %agg.result21 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result21, i8* align 16 %tmp219, i32 32, i1 false)
+  ret void
+
+; Check that one of the memcpy's are removed.
+;; FIXME: PR 8643 We should be able to eliminate the last memcpy here.
+
+; CHECK-LABEL: @test1(
+; CHECK: call void @ccoshl
+; CHECK: call void @llvm.memcpy
+; CHECK-NOT: llvm.memcpy
+; CHECK: ret void
+}
+
+declare void @ccoshl(%0* nocapture sret, x86_fp80, x86_fp80) nounwind
+
+
+; The intermediate alloca and one of the memcpy's should be eliminated, the
+; other should be related with a memmove.
+define void @test2(i8* %P, i8* %Q) nounwind  {
+  %memtmp = alloca %0, align 16
+  %R = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false)
+  ret void
+
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* align 16 %Q, i8* align 16 %P
+; CHECK-NEXT: ret void
+}
+
+; The intermediate alloca and one of the memcpy's should be eliminated, the
+; other should be related with a memcpy.
+define void @test2_memcpy(i8* noalias %P, i8* noalias %Q) nounwind  {
+  %memtmp = alloca %0, align 16
+  %R = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false)
+  ret void
+
+; CHECK-LABEL: @test2_memcpy(
+; CHECK-NEXT: call void @llvm.memcpy{{.*}}(i8* align 16 %Q, i8* align 16 %P
+; CHECK-NEXT: ret void
+}
+
+
+
+
+ at x = external global %0
+
+define void @test3(%0* noalias sret %agg.result) nounwind  {
+  %x.0 = alloca %0
+  %x.01 = bitcast %0* %x.0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %x.01, i8* align 16 bitcast (%0* @x to i8*), i32 32, i1 false)
+  %agg.result2 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result2, i8* align 16 %x.01, i32 32, i1 false)
+  ret void
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: %agg.result1 = bitcast
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret void
+}
+
+
+; PR8644
+define void @test4(i8 *%P) {
+  %A = alloca %1
+  %a = bitcast %1* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 %P, i64 8, i1 false)
+  call void @test4a(i8* align 1 byval %a)
+  ret void
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: call void @test4a(
+}
+
+; Make sure we don't remove the memcpy if the source address space doesn't match the byval argument
+define void @test4_addrspace(i8 addrspace(1)* %P) {
+  %A = alloca %1
+  %a = bitcast %1* %A to i8*
+  call void @llvm.memcpy.p0i8.p1i8.i64(i8* align 4 %a, i8 addrspace(1)* align 4 %P, i64 8, i1 false)
+  call void @test4a(i8* align 1 byval %a)
+  ret void
+; CHECK-LABEL: @test4_addrspace(
+; CHECK: call void @llvm.memcpy.p0i8.p1i8.i64(
+; CHECK-NEXT: call void @test4a(
+}
+
+declare void @test4a(i8* align 1 byval)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i64(i8* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind
+
+%struct.S = type { i128, [4 x i8]}
+
+ at sS = external global %struct.S, align 16
+
+declare void @test5a(%struct.S* align 16 byval) nounwind ssp
+
+
+; rdar://8713376 - This memcpy can't be eliminated.
+define i32 @test5(i32 %x) nounwind ssp {
+entry:
+  %y = alloca %struct.S, align 16
+  %tmp = bitcast %struct.S* %y to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %tmp, i8* align 16 bitcast (%struct.S* @sS to i8*), i64 32, i1 false)
+  %a = getelementptr %struct.S, %struct.S* %y, i64 0, i32 1, i64 0
+  store i8 4, i8* %a
+  call void @test5a(%struct.S* align 16 byval %y)
+  ret i32 0
+  ; CHECK-LABEL: @test5(
+  ; CHECK: store i8 4
+  ; CHECK: call void @test5a(%struct.S* byval align 16 %y)
+}
+
+;; Noop memcpy should be zapped.
+define void @test6(i8 *%P) {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %P, i8* align 4 %P, i64 8, i1 false)
+  ret void
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: ret void
+}
+
+
+; PR9794 - Should forward memcpy into byval argument even though the memcpy
+; isn't itself 8 byte aligned.
+%struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define i32 @test7(%struct.p* nocapture align 8 byval %q) nounwind ssp {
+entry:
+  %agg.tmp = alloca %struct.p, align 4
+  %tmp = bitcast %struct.p* %agg.tmp to i8*
+  %tmp1 = bitcast %struct.p* %q to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %tmp, i8* align 4 %tmp1, i64 48, i1 false)
+  %call = call i32 @g(%struct.p* align 8 byval %agg.tmp) nounwind
+  ret i32 %call
+; CHECK-LABEL: @test7(
+; CHECK: call i32 @g(%struct.p* byval align 8 %q) [[$NUW:#[0-9]+]]
+}
+
+declare i32 @g(%struct.p* align 8 byval)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+
+; PR11142 - When looking for a memcpy-memcpy dependency, don't get stuck on
+; instructions between the memcpy's that only affect the destination pointer.
+ at test8.str = internal constant [7 x i8] c"ABCDEF\00"
+
+define void @test8() {
+; CHECK: test8
+; CHECK-NOT: memcpy
+  %A = tail call i8* @malloc(i32 10)
+  %B = getelementptr inbounds i8, i8* %A, i64 2
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %B, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @test8.str, i64 0, i64 0), i32 7, i1 false)
+  %C = tail call i8* @malloc(i32 10)
+  %D = getelementptr inbounds i8, i8* %C, i64 2
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %D, i8* %B, i32 7, i1 false)
+  ret void
+; CHECK: ret void
+}
+
+declare noalias i8* @malloc(i32)
+
+; rdar://11341081
+%struct.big = type { [50 x i32] }
+
+define void @test9_addrspacecast() nounwind ssp uwtable {
+entry:
+; CHECK-LABEL: @test9_addrspacecast(
+; CHECK: f1
+; CHECK-NOT: memcpy
+; CHECK: f2
+  %b = alloca %struct.big, align 4
+  %tmp = alloca %struct.big, align 4
+  call void @f1(%struct.big* sret %tmp)
+  %0 = addrspacecast %struct.big* %b to i8 addrspace(1)*
+  %1 = addrspacecast %struct.big* %tmp to i8 addrspace(1)*
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %0, i8 addrspace(1)* align 4 %1, i64 200, i1 false)
+  call void @f2(%struct.big* %b)
+  ret void
+}
+
+define void @test9() nounwind ssp uwtable {
+entry:
+; CHECK: test9
+; CHECK: f1
+; CHECK-NOT: memcpy
+; CHECK: f2
+  %b = alloca %struct.big, align 4
+  %tmp = alloca %struct.big, align 4
+  call void @f1(%struct.big* sret %tmp)
+  %0 = bitcast %struct.big* %b to i8*
+  %1 = bitcast %struct.big* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 200, i1 false)
+  call void @f2(%struct.big* %b)
+  ret void
+}
+
+; rdar://14073661.
+; Test10 triggered assertion when the compiler try to get the size of the
+; opaque type of *x, where the x is the formal argument with attribute 'sret'.
+
+%opaque = type opaque
+declare void @foo(i32* noalias nocapture)
+
+define void @test10(%opaque* noalias nocapture sret %x, i32 %y) {
+  %a = alloca i32, align 4
+  store i32 %y, i32* %a
+  call void @foo(i32* noalias nocapture %a)
+  %c = load i32, i32* %a
+  %d = bitcast %opaque* %x to i32*
+  store i32 %c, i32* %d
+  ret void
+}
+
+; don't create new addressspacecasts when we don't know they're safe for the target
+define void @test11([20 x i32] addrspace(1)* nocapture dereferenceable(80) %P) {
+  %A = alloca [20 x i32], align 4
+  %a = bitcast [20 x i32]* %A to i8*
+  %b = bitcast [20 x i32] addrspace(1)* %P to i8 addrspace(1)*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %a, i8 0, i64 80, i1 false)
+  call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* align 4 %b, i8* align 4 %a, i64 80, i1 false)
+  ret void
+; CHECK-LABEL: @test11(
+; CHECK-NOT: addrspacecast
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture, i8* nocapture, i64, i1) nounwind
+
+declare void @f1(%struct.big* nocapture sret)
+declare void @f2(%struct.big*)
+
+; CHECK: attributes [[$NUW]] = { nounwind }
+; CHECK: attributes #1 = { argmemonly nounwind }
+; CHECK: attributes #2 = { nounwind ssp }
+; CHECK: attributes #3 = { nounwind ssp uwtable }

Added: llvm/trunk/test/Transforms/MemCpyOpt/memmove.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memmove.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/memmove.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/memmove.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s
+; These memmoves should get optimized to memcpys.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.0"
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+define i8* @test1(i8* nocapture %src) nounwind {
+entry:
+; CHECK-LABEL: @test1(
+; CHECK: call void @llvm.memcpy
+
+  %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 13) to i32))
+  %call3 = bitcast i8* %malloccall to [13 x i8]*
+  %call3.sub = getelementptr inbounds [13 x i8], [13 x i8]* %call3, i64 0, i64 0
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call3.sub, i8* %src, i64 13, i1 false)
+  ret i8* %call3.sub
+}
+declare noalias i8* @malloc(i32)
+
+
+define void @test2(i8* %P) nounwind {
+entry:
+; CHECK-LABEL: @test2(
+; CHECK: call void @llvm.memcpy
+  %add.ptr = getelementptr i8, i8* %P, i64 16
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i1 false)
+  ret void
+}
+
+; This cannot be optimize because the src/dst really do overlap.
+define void @test3(i8* %P) nounwind {
+entry:
+; CHECK-LABEL: @test3(
+; CHECK: call void @llvm.memmove
+  %add.ptr = getelementptr i8, i8* %P, i64 16
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i1 false)
+  ret void
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,213 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -memcpyopt -S %s | FileCheck %s
+
+; memset -> memcpy forwarding, if memcpy is larger than memset, but trailing
+; bytes are known to be undef.
+
+
+%T = type { i64, i32, i32 }
+
+define void @test_alloca(i8* %result) {
+; CHECK-LABEL: @test_alloca(
+; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %a = alloca %T, align 8
+  %b = bitcast %T* %a to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
+  ret void
+}
+
+define void @test_alloca_with_lifetimes(i8* %result) {
+; CHECK-LABEL: @test_alloca_with_lifetimes(
+; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]])
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]])
+; CHECK-NEXT:    ret void
+;
+  %a = alloca %T, align 8
+  %b = bitcast %T* %a to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %b)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %b)
+  ret void
+}
+
+define void @test_malloc_with_lifetimes(i8* %result) {
+; CHECK-LABEL: @test_malloc_with_lifetimes(
+; CHECK-NEXT:    [[A:%.*]] = call i8* @malloc(i64 16)
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]])
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]])
+; CHECK-NEXT:    call void @free(i8* [[A]])
+; CHECK-NEXT:    ret void
+;
+  %a = call i8* @malloc(i64 16)
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %a)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %a, i8 0, i64 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %a, i64 16, i1 false)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %a)
+  call void @free(i8* %a)
+  ret void
+}
+
+; memcpy size is larger than lifetime, don't optimize.
+define void @test_copy_larger_than_lifetime_size(i8* %result) {
+; CHECK-LABEL: @test_copy_larger_than_lifetime_size(
+; CHECK-NEXT:    [[A:%.*]] = call i8* @malloc(i64 16)
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 12, i8* [[A]])
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 12, i8* [[A]])
+; CHECK-NEXT:    call void @free(i8* [[A]])
+; CHECK-NEXT:    ret void
+;
+  %a = call i8* @malloc(i64 16)
+  call void @llvm.lifetime.start.p0i8(i64 12, i8* %a)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %a, i8 0, i64 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %a, i64 16, i1 false)
+  call void @llvm.lifetime.end.p0i8(i64 12, i8* %a)
+  call void @free(i8* %a)
+  ret void
+}
+
+; The trailing bytes are not known to be undef, we can't ignore them.
+define void @test_not_undef_memory(i8* %result, i8* %input) {
+; CHECK-LABEL: @test_not_undef_memory(
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[INPUT:%.*]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[INPUT]], i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memset.p0i8.i64(i8* align 8 %input, i8 0, i64 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %input, i64 16, i1 false)
+  ret void
+}
+
+; Memset is volatile, memcpy is not. Can be optimized.
+define void @test_volatile_memset(i8* %result) {
+; CHECK-LABEL: @test_volatile_memset(
+; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 true)
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %a = alloca %T, align 8
+  %b = bitcast %T* %a to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
+  ret void
+}
+
+; Memcpy is volatile, memset is not. Cannot be optimized.
+define void @test_volatile_memcpy(i8* %result) {
+; CHECK-LABEL: @test_volatile_memcpy(
+; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 true)
+; CHECK-NEXT:    ret void
+;
+  %a = alloca %T, align 8
+  %b = bitcast %T* %a to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 true)
+  ret void
+}
+
+; Write between memset and memcpy, can't optimize.
+define void @test_write_between(i8* %result) {
+; CHECK-LABEL: @test_write_between(
+; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
+; CHECK-NEXT:    store i8 -1, i8* [[B]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %a = alloca %T, align 8
+  %b = bitcast %T* %a to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
+  store i8 -1, i8* %b
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
+  ret void
+}
+
+; A write prior to the memset, which is part of the memset region.
+; We could optimize this, but currently don't, because the used memory location is imprecise.
+define void @test_write_before_memset_in_memset_region(i8* %result) {
+; CHECK-LABEL: @test_write_before_memset_in_memset_region(
+; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT:    store i8 -1, i8* [[B]]
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %a = alloca %T, align 8
+  %b = bitcast %T* %a to i8*
+  store i8 -1, i8* %b
+  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
+  ret void
+}
+
+; A write prior to the memset, which is part of the memcpy (but not memset) region.
+; This cannot be optimized.
+define void @test_write_before_memset_in_memcpy_region(i8* %result) {
+; CHECK-LABEL: @test_write_before_memset_in_memcpy_region(
+; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 2
+; CHECK-NEXT:    store i32 -1, i32* [[C]]
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %a = alloca %T, align 8
+  %b = bitcast %T* %a to i8*
+  %c = getelementptr inbounds %T, %T* %a, i64 0, i32 2
+  store i32 -1, i32* %c
+  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
+  ret void
+}
+
+; A write prior to the memset, which is part of both the memset and memcpy regions.
+; This cannot be optimized.
+define void @test_write_before_memset_in_both_regions(i8* %result) {
+; CHECK-LABEL: @test_write_before_memset_in_both_regions(
+; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 1
+; CHECK-NEXT:    store i32 -1, i32* [[C]]
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 10, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %a = alloca %T, align 8
+  %b = bitcast %T* %a to i8*
+  %c = getelementptr inbounds %T, %T* %a, i64 0, i32 1
+  store i32 -1, i32* %c
+  call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 10, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
+  ret void
+}
+
+declare i8* @malloc(i64)
+declare void @free(i8*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)

Added: llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,168 @@
+; RUN: opt -basicaa -memcpyopt -S %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: define void @test
+; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size
+; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size
+; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
+; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false)
+; CHECK-NEXT: ret void
+define void @test(i8* %src, i64 %src_size, i8* %dst, i64 %dst_size, i8 %c) {
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_different_types_i32_i64
+; CHECK: [[DSTSIZE:%[0-9]+]] = zext i32 %dst_size to i64
+; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 [[DSTSIZE]], %src_size
+; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 [[DSTSIZE]], %src_size
+; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
+; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false)
+; CHECK-NEXT: ret void
+define void @test_different_types_i32_i64(i8* %dst, i8* %src, i32 %dst_size, i64 %src_size, i8 %c) {
+  call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_different_types_i128_i32
+; CHECK: [[SRCSIZE:%[0-9]+]] = zext i32 %src_size to i128
+; CHECK: [[ULE:%[0-9]+]] = icmp ule i128 %dst_size, [[SRCSIZE]]
+; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i128 %dst_size, [[SRCSIZE]]
+; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]]
+; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i128 [[SRCSIZE]]
+; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[DST]], i8 %c, i128 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false)
+; CHECK-NEXT: ret void
+define void @test_different_types_i128_i32(i8* %dst, i8* %src, i128 %dst_size, i32 %src_size, i8 %c) {
+  call void @llvm.memset.p0i8.i128(i8* %dst, i8 %c, i128 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_different_types_i32_i128
+; CHECK: [[DSTSIZE:%[0-9]+]] = zext i32 %dst_size to i128
+; CHECK: [[ULE:%[0-9]+]] = icmp ule i128 [[DSTSIZE]], %src_size
+; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i128 [[DSTSIZE]], %src_size
+; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]]
+; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i128 %src_size
+; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[DST]], i8 %c, i128 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i1 false)
+; CHECK-NEXT: ret void
+define void @test_different_types_i32_i128(i8* %dst, i8* %src, i32 %dst_size, i128 %src_size, i8 %c) {
+  call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_different_types_i64_i32
+; CHECK: [[SRCSIZE:%[0-9]+]] = zext i32 %src_size to i64
+; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, [[SRCSIZE]]
+; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, [[SRCSIZE]]
+; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
+; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 [[SRCSIZE]]
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false)
+; CHECK-NEXT: ret void
+define void @test_different_types_i64_i32(i8* %dst, i8* %src, i64 %dst_size, i32 %src_size, i8 %c) {
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_align_same
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 {{.*}}, i1 false)
+define void @test_align_same(i8* %src, i8* %dst, i64 %dst_size) {
+  call void @llvm.memset.p0i8.i64(i8* align 8 %dst, i8 0, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_align_min
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 {{.*}}, i1 false)
+define void @test_align_min(i8* %src, i8* %dst, i64 %dst_size) {
+  call void @llvm.memset.p0i8.i64(i8* align 8 %dst, i8 0, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_align_memcpy
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 {{.*}}, i1 false)
+define void @test_align_memcpy(i8* %src, i8* %dst, i64 %dst_size) {
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 80, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_non_i8_dst_type
+; CHECK-NEXT: %dst = bitcast i64* %dst_pi64 to i8*
+; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size
+; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size
+; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]]
+; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false)
+; CHECK-NEXT: ret void
+define void @test_non_i8_dst_type(i8* %src, i64 %src_size, i64* %dst_pi64, i64 %dst_size, i8 %c) {
+  %dst = bitcast i64* %dst_pi64 to i8*
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_different_dst
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i1 false)
+; CHECK-NEXT: ret void
+define void @test_different_dst(i8* %dst2, i8* %src, i64 %src_size, i8* %dst, i64 %dst_size) {
+  call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i1 false)
+  ret void
+}
+
+; Make sure we also take into account dependencies on the destination.
+
+; CHECK-LABEL: define i8 @test_intermediate_read
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i1 false)
+; CHECK-NEXT: %r = load i8, i8* %a
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i1 false)
+; CHECK-NEXT: ret i8 %r
+define i8 @test_intermediate_read(i8* %a, i8* %b) #0 {
+  call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i1 false)
+  %r = load i8, i8* %a
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i1 false)
+  ret i8 %r
+}
+
+%struct = type { [8 x i8], [8 x i8] }
+
+; CHECK-LABEL: define void @test_intermediate_write
+; CHECK-NEXT: %a = alloca %struct
+; CHECK-NEXT: %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0
+; CHECK-NEXT: %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a0, i8 0, i64 16, i1 false)
+; CHECK-NEXT: store i8 1, i8* %a1
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a0, i8* %b, i64 8, i1 false)
+; CHECK-NEXT: ret void
+define void @test_intermediate_write(i8* %b) #0 {
+  %a = alloca %struct
+  %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0
+  %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0
+  call void @llvm.memset.p0i8.i64(i8* %a0, i8 0, i64 16, i1 false)
+  store i8 1, i8* %a1
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a0, i8* %b, i64 8, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1)
+declare void @llvm.memset.p0i8.i128(i8* nocapture, i8, i128, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i128(i8* nocapture, i8* nocapture readonly, i128, i1)

Added: llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,101 @@
+; RUN: opt -memcpyopt -S %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: define void @test(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst2, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: ret void
+define void @test(i8* %dst1, i8* %dst2, i8 %c) {
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 128, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_smaller_memcpy(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i1 false)
+; CHECK-NEXT: ret void
+define void @test_smaller_memcpy(i8* %dst1, i8* %dst2, i8 %c) {
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_smaller_memset(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
+; CHECK-NEXT: ret void
+define void @test_smaller_memset(i8* %dst1, i8* %dst2, i8 %c) {
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_align_memset(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: ret void
+define void @test_align_memset(i8* %dst1, i8* %dst2, i8 %c) {
+  call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_different_types(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i1 false)
+; CHECK-NEXT: ret void
+define void @test_different_types(i8* %dst1, i8* %dst2, i8 %c) {
+  call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %dst1, i32 100, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_different_types_2(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 %dst1, i8 %c, i32 128, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i1 false)
+; CHECK-NEXT: ret void
+define void @test_different_types_2(i8* %dst1, i8* %dst2, i8 %c) {
+  call void @llvm.memset.p0i8.i32(i8* align 8 %dst1, i8 %c, i32 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_different_source_gep(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: %p = getelementptr i8, i8* %dst1, i64 64
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i1 false)
+; CHECK-NEXT: ret void
+define void @test_different_source_gep(i8* %dst1, i8* %dst2, i8 %c) {
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+  ; FIXME: We could optimize this as well.
+  %p = getelementptr i8, i8* %dst1, i64 64
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_variable_size_1(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
+; CHECK-NEXT: ret void
+define void @test_variable_size_1(i8* %dst1, i64 %dst1_size, i8* %dst2, i8 %c) {
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: define void @test_variable_size_2(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i1 false)
+; CHECK-NEXT: ret void
+define void @test_variable_size_2(i8* %dst1, i8* %dst2, i64 %dst2_size, i8 %c) {
+  call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1)

Added: llvm/trunk/test/Transforms/MemCpyOpt/nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/nontemporal.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/nontemporal.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/nontemporal.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,49 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that we don't combine nontemporal stores into memset calls.
+
+define void @nontemporal_stores_1(<4 x float>* nocapture %dst) {
+; CHECK-LABEL: @nontemporal_stores_1
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr2, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr3, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr4, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr5, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr6, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr7, align 16, !nontemporal !0
+; CHECK-NEXT: ret void
+entry:
+  store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+  %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1
+  store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+  %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 2
+  store <4 x float> zeroinitializer, <4 x float>* %ptr2, align 16, !nontemporal !0
+  %ptr3 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 3
+  store <4 x float> zeroinitializer, <4 x float>* %ptr3, align 16, !nontemporal !0
+  %ptr4 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 4
+  store <4 x float> zeroinitializer, <4 x float>* %ptr4, align 16, !nontemporal !0
+  %ptr5 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 5
+  store <4 x float> zeroinitializer, <4 x float>* %ptr5, align 16, !nontemporal !0
+  %ptr6 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 6
+  store <4 x float> zeroinitializer, <4 x float>* %ptr6, align 16, !nontemporal !0
+  %ptr7 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 7
+  store <4 x float> zeroinitializer, <4 x float>* %ptr7, align 16, !nontemporal !0
+  ret void
+}
+
+define void @nontemporal_stores_2(<4 x float>* nocapture %dst) {
+; CHECK-LABEL: @nontemporal_stores_2
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+; CHECK-NEXT: ret void
+entry:
+  store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+  %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1
+  store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = !{i32 1}

Added: llvm/trunk/test/Transforms/MemCpyOpt/pr29105.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/pr29105.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/pr29105.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/pr29105.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt -memcpyopt -instcombine -S %s | FileCheck %s
+%Foo = type { [2048 x i64] }
+
+; Make sure that all mempcy calls are converted to memset calls, or removed.
+; CHECK-LABEL: @baz(
+; CHECK-NOT: call void @llvm.memcpy
+define void @baz() unnamed_addr #0 {
+entry-block:
+  %x.sroa.0 = alloca [2048 x i64], align 8
+  %tmp0 = alloca [2048 x i64], align 8
+  %0 = bitcast [2048 x i64]* %tmp0 to i8*
+  %tmp2 = alloca %Foo, align 8
+  %x.sroa.0.0..sroa_cast6 = bitcast [2048 x i64]* %x.sroa.0 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16384, i8* %x.sroa.0.0..sroa_cast6)
+  call void @llvm.lifetime.start.p0i8(i64 16384, i8* %0)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 16384, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %x.sroa.0.0..sroa_cast6, i8* align 8 %0, i64 16384, i1 false)
+  call void @llvm.lifetime.end.p0i8(i64 16384, i8* %0)
+  %1 = bitcast %Foo* %tmp2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16384, i8* %1)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %1, i8* align 8 %x.sroa.0.0..sroa_cast6, i64 16384, i1 false)
+  call void @bar(%Foo* noalias nocapture nonnull dereferenceable(16384) %tmp2)
+  call void @llvm.lifetime.end.p0i8(i64 16384, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 16384, i8* %x.sroa.0.0..sroa_cast6)
+  ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
+
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+declare void @bar(%Foo* noalias nocapture readonly dereferenceable(16384)) unnamed_addr #0
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
+
+attributes #0 = { uwtable }
+attributes #1 = { argmemonly nounwind }

Added: llvm/trunk/test/Transforms/MemCpyOpt/process_store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/process_store.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/process_store.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/process_store.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt < %s -memcpyopt -disable-output
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at b = common dso_local local_unnamed_addr global i32 0, align 4
+ at a = common dso_local local_unnamed_addr global i32 0, align 4
+
+declare dso_local i32 @f1()
+
+; Do not crash due to store first in BB.
+define dso_local void @f2() {
+for.end:
+  %0 = load i32, i32* @b, align 4
+  ret void
+
+for.body:
+  store i32 %1, i32* @a, align 4
+  %call = call i32 @f1()
+  %cmp = icmp sge i32 %call, 0
+  %1 = load i32, i32* @b, align 4
+  br label %for.body
+}
+
+; Do not crash due to call not before store in BB.
+define dso_local void @f3() {
+for.end:
+  %0 = load i32, i32* @b, align 4
+  ret void
+
+for.body:
+  %t = add i32 %t2, 1
+  store i32 %1, i32* @a, align 4
+  %call = call i32 @f1()
+  %cmp = icmp sge i32 %call, 0
+  %1 = load i32, i32* @b, align 4
+  %t2 = xor i32 %t, 5
+  br label %for.body
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/profitable-memset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/profitable-memset.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/profitable-memset.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/profitable-memset.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: @foo(
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 2 %2, i8 0, i64 8, i1 false)
+
+define void @foo(i64* nocapture %P) {
+entry:
+  %0 = bitcast i64* %P to i16*
+  %arrayidx = getelementptr inbounds i16, i16* %0, i64 1
+  %1 = bitcast i16* %arrayidx to i32*
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 3
+  store i16 0, i16* %0, align 2
+  store i32 0, i32* %1, align 4
+  store i16 0, i16* %arrayidx1, align 2
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/MemCpyOpt/smaller.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/smaller.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/smaller.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/smaller.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt -memcpyopt -S < %s | FileCheck %s
+; RUN: opt -passes=memcpyopt -S < %s | FileCheck %s
+; rdar://8875553
+
+; Memcpyopt shouldn't optimize the second memcpy using the first
+; because the first has a smaller size.
+
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp, i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i1 false)
+
+target datalayout = "e-p:32:32:32"
+
+%struct.s = type { [11 x i8], i32 }
+
+ at .str = private constant [11 x i8] c"0123456789\00"
+ at cell = external global %struct.s
+
+declare void @check(%struct.s* byval %p) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+
+define void @foo() nounwind {
+entry:
+  %agg.tmp = alloca %struct.s, align 4
+  store i32 99, i32* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 1), align 4
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i8* align 1 getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 11, i1 false)
+  %tmp = getelementptr inbounds %struct.s, %struct.s* %agg.tmp, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp, i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i1 false)
+  call void @check(%struct.s* byval %agg.tmp)
+  ret void
+}

Added: llvm/trunk/test/Transforms/MemCpyOpt/sret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/sret.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/sret.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/sret.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | not grep "call.*memcpy"
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+
+%0 = type { x86_fp80, x86_fp80 }
+
+define void @ccosl(%0* noalias sret %agg.result, %0* byval align 8 %z) nounwind {
+entry:
+  %iz = alloca %0
+  %memtmp = alloca %0, align 16
+  %tmp1 = getelementptr %0, %0* %z, i32 0, i32 1
+  %tmp2 = load x86_fp80, x86_fp80* %tmp1, align 16
+  %tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2
+  %tmp4 = getelementptr %0, %0* %iz, i32 0, i32 1
+  %real = getelementptr %0, %0* %iz, i32 0, i32 0
+  %tmp7 = getelementptr %0, %0* %z, i32 0, i32 0
+  %tmp8 = load x86_fp80, x86_fp80* %tmp7, align 16
+  store x86_fp80 %tmp3, x86_fp80* %real, align 16
+  store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
+  call void @ccoshl(%0* noalias sret %memtmp, %0* byval align 8 %iz) nounwind
+  %memtmp14 = bitcast %0* %memtmp to i8*
+  %agg.result15 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result15, i8* align 16 %memtmp14, i32 32, i1 false)
+  ret void
+}
+
+declare void @ccoshl(%0* noalias nocapture sret, %0* byval) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind

Added: llvm/trunk/test/Transforms/MemCpyOpt/stackrestore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/stackrestore.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/stackrestore.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/stackrestore.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,74 @@
+; RUN: opt -S -memcpyopt < %s | FileCheck %s
+
+; PR40118: BasicAA didn't realize that stackrestore ends the lifetime of
+; unescaped dynamic allocas, such as those that might come from inalloca.
+
+source_filename = "t.cpp"
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-unknown-windows-msvc19.14.26433"
+
+ at str = internal constant [9 x i8] c"abcdxxxxx"
+
+
+; Test that we can propagate memcpy through an unescaped dynamic alloca across
+; a call to @external.
+
+define i32 @test_norestore(i32 %n) {
+  %tmpmem = alloca [10 x i8], align 4
+  %tmp = getelementptr inbounds [10 x i8], [10 x i8]* %tmpmem, i32 0, i32 0
+
+  ; Make a dynamic alloca, initialize it.
+  %p = alloca i8, i32 %n, align 4
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false)
+
+  ; This extra byte exists to prevent memcpyopt from propagating @str.
+  %p10 = getelementptr inbounds i8, i8* %p, i32 9
+  store i8 0, i8* %p10
+
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %p, i32 10, i1 false)
+  call void @external()
+  %heap = call i8* @malloc(i32 9)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %heap, i8* %tmp, i32 9, i1 false)
+  call void @useit(i8* %heap)
+  ret i32 0
+}
+
+; CHECK-LABEL: define i32 @test_norestore(i32 %n)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %p, i32 10, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %heap, i8* %p, i32 9, i1 false)
+
+
+; Do not propagate memcpy from %p across the stackrestore.
+
+define i32 @test_stackrestore() {
+  %tmpmem = alloca [10 x i8], align 4
+  %tmp = getelementptr inbounds [10 x i8], [10 x i8]* %tmpmem, i32 0, i32 0
+  %inalloca.save = tail call i8* @llvm.stacksave()
+  %argmem = alloca inalloca [10 x i8], align 4
+  %p = getelementptr inbounds [10 x i8], [10 x i8]* %argmem, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false)
+
+  ; This extra byte exists to prevent memcpyopt from propagating @str.
+  %p10 = getelementptr inbounds [10 x i8], [10 x i8]* %argmem, i32 0, i32 9
+  store i8 0, i8* %p10
+
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %p, i32 10, i1 false)
+  call void @llvm.stackrestore(i8* %inalloca.save)
+  %heap = call i8* @malloc(i32 9)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %heap, i8* %tmp, i32 9, i1 false)
+  call void @useit(i8* %heap)
+  ret i32 0
+}
+
+; CHECK-LABEL: define i32 @test_stackrestore()
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %p, i32 10, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %heap, i8* %tmp, i32 9, i1 false)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+declare i8* @malloc(i32)
+declare void @useit(i8*)
+declare void @external()

Added: llvm/trunk/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,276 @@
+; RUN: opt -mergefunc -disable-output < %s
+; This used to crash.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
+%"struct.kc::impl_CexpressionDQ" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_CexpressionDQ"* }
+%"struct.kc::impl_Ctext" = type { %"struct.kc::impl_Ccode_option", i32, %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_Ctext_elem"*, %"struct.kc::impl_Ctext"* }
+%"struct.kc::impl_Ctext_elem" = type { %"struct.kc::impl_abstract_phylum", i32, %"struct.kc::impl_casestring__Str"* }
+%"struct.kc::impl_ID" = type { %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_casestring__Str"*, i32, %"struct.kc::impl_casestring__Str"* }
+%"struct.kc::impl_abstract_phylum" = type { i32 (...)** }
+%"struct.kc::impl_ac_abstract_declarator_AcAbsdeclDirdecl" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_Ccode_option"* }
+%"struct.kc::impl_casestring__Str" = type { %"struct.kc::impl_abstract_phylum", i8* }
+%"struct.kc::impl_elem_patternrepresentation" = type { %"struct.kc::impl_abstract_phylum", i32, %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_ID"* }
+%"struct.kc::impl_fileline" = type { %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_casestring__Str"*, i32 }
+%"struct.kc::impl_fileline_FileLine" = type { %"struct.kc::impl_fileline" }
+%"struct.kc::impl_outmostpatterns" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_elem_patternrepresentation"*, %"struct.kc::impl_outmostpatterns"* }
+%"struct.kc::impl_withcaseinfo_Withcaseinfo" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_outmostpatterns"*, %"struct.kc::impl_outmostpatterns"*, %"struct.kc::impl_Ctext"* }
+
+ at _ZTVN2kc13impl_filelineE = external constant [13 x i32 (...)*], align 32
+ at .str = external constant [1 x i8], align 1
+ at _ZTVN2kc22impl_fileline_FileLineE = external constant [13 x i32 (...)*], align 32
+
+define void @_ZN2kc22impl_fileline_FileLineC2EPNS_20impl_casestring__StrEi(%"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_casestring__Str"* %_file, i32 %_line) align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline_FileLine"*, align 4
+  %_file_addr = alloca %"struct.kc::impl_casestring__Str"*, align 4
+  %_line_addr = alloca i32, align 4
+  %save_filt.150 = alloca i32
+  %save_eptr.149 = alloca i8*
+  %iftmp.99 = alloca %"struct.kc::impl_casestring__Str"*
+  %eh_exception = alloca i8*
+  %eh_selector = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_fileline_FileLine"** %this_addr
+  store %"struct.kc::impl_casestring__Str"* %_file, %"struct.kc::impl_casestring__Str"** %_file_addr
+  store i32 %_line, i32* %_line_addr
+  %0 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %0, i32 0, i32 0
+  call void @_ZN2kc13impl_filelineC2Ev() nounwind
+  %2 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %3 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %2, i32 0, i32 0
+  %4 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %3, i32 0, i32 0
+  %5 = getelementptr inbounds %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_abstract_phylum"* %4, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*], [13 x i32 (...)*]* @_ZTVN2kc22impl_fileline_FileLineE, i32 0, i32 2), i32 (...)*** %5, align 4
+  %6 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  %7 = icmp eq %"struct.kc::impl_casestring__Str"* %6, null
+  br i1 %7, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %8 = invoke %"struct.kc::impl_casestring__Str"* @_ZN2kc12mkcasestringEPKci()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %bb
+  store %"struct.kc::impl_casestring__Str"* %8, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb1:                                              ; preds = %entry
+  %9 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  store %"struct.kc::impl_casestring__Str"* %9, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %invcont
+  %10 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %11 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %10, i32 0, i32 0
+  %12 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %11, i32 0, i32 1
+  %13 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  store %"struct.kc::impl_casestring__Str"* %13, %"struct.kc::impl_casestring__Str"** %12, align 4
+  %14 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %15 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %14, i32 0, i32 0
+  %16 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %15, i32 0, i32 2
+  %17 = load i32, i32* %_line_addr, align 4
+  store i32 %17, i32* %16, align 4
+  ret void
+
+lpad:                                             ; preds = %bb
+  %eh_ptr = landingpad { i8*, i32 }
+              cleanup
+  %exn = extractvalue { i8*, i32 } %eh_ptr, 0
+  store i8* %exn, i8** %eh_exception
+  %eh_ptr4 = load i8*, i8** %eh_exception
+  %eh_select5 = extractvalue { i8*, i32 } %eh_ptr, 1
+  store i32 %eh_select5, i32* %eh_selector
+  %eh_select = load i32, i32* %eh_selector
+  store i32 %eh_select, i32* %save_filt.150, align 4
+  %eh_value = load i8*, i8** %eh_exception
+  store i8* %eh_value, i8** %save_eptr.149, align 4
+  %18 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %19 = bitcast %"struct.kc::impl_fileline_FileLine"* %18 to %"struct.kc::impl_fileline"*
+  call void @_ZN2kc13impl_filelineD2Ev(%"struct.kc::impl_fileline"* %19) nounwind
+  %20 = load i8*, i8** %save_eptr.149, align 4
+  store i8* %20, i8** %eh_exception, align 4
+  %21 = load i32, i32* %save_filt.150, align 4
+  store i32 %21, i32* %eh_selector, align 4
+  %eh_ptr6 = load i8*, i8** %eh_exception
+  call void @_Unwind_Resume_or_Rethrow()
+  unreachable
+}
+
+declare void @_ZN2kc13impl_filelineC2Ev() nounwind align 2
+
+define void @_ZN2kc13impl_filelineD1Ev(%"struct.kc::impl_fileline"* %this) nounwind align 2 {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline"*, align 4
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline"* %this, %"struct.kc::impl_fileline"** %this_addr
+  %0 = load %"struct.kc::impl_fileline"*, %"struct.kc::impl_fileline"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %0, i32 0, i32 0
+  %2 = getelementptr inbounds %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_abstract_phylum"* %1, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*], [13 x i32 (...)*]* @_ZTVN2kc13impl_filelineE, i32 0, i32 2), i32 (...)*** %2, align 4
+  %3 = trunc i32 0 to i8
+  %toBool = icmp ne i8 %3, 0
+  br i1 %toBool, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  %4 = load %"struct.kc::impl_fileline"*, %"struct.kc::impl_fileline"** %this_addr, align 4
+  %5 = bitcast %"struct.kc::impl_fileline"* %4 to i8*
+  call void @_ZdlPv() nounwind
+  br label %return
+
+return:                                           ; preds = %bb1, %entry
+  ret void
+}
+
+declare void @_ZdlPv() nounwind
+
+define void @_ZN2kc13impl_filelineD2Ev(%"struct.kc::impl_fileline"* %this) nounwind align 2 {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline"*, align 4
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline"* %this, %"struct.kc::impl_fileline"** %this_addr
+  %0 = load %"struct.kc::impl_fileline"*, %"struct.kc::impl_fileline"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %0, i32 0, i32 0
+  %2 = getelementptr inbounds %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_abstract_phylum"* %1, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*], [13 x i32 (...)*]* @_ZTVN2kc13impl_filelineE, i32 0, i32 2), i32 (...)*** %2, align 4
+  %3 = trunc i32 0 to i8
+  %toBool = icmp ne i8 %3, 0
+  br i1 %toBool, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  %4 = load %"struct.kc::impl_fileline"*, %"struct.kc::impl_fileline"** %this_addr, align 4
+  %5 = bitcast %"struct.kc::impl_fileline"* %4 to i8*
+  call void @_ZdlPv() nounwind
+  br label %return
+
+return:                                           ; preds = %bb1, %entry
+  ret void
+}
+
+define void @_ZN2kc22impl_fileline_FileLineC1EPNS_20impl_casestring__StrEi(%"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_casestring__Str"* %_file, i32 %_line) align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline_FileLine"*, align 4
+  %_file_addr = alloca %"struct.kc::impl_casestring__Str"*, align 4
+  %_line_addr = alloca i32, align 4
+  %save_filt.148 = alloca i32
+  %save_eptr.147 = alloca i8*
+  %iftmp.99 = alloca %"struct.kc::impl_casestring__Str"*
+  %eh_exception = alloca i8*
+  %eh_selector = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_fileline_FileLine"** %this_addr
+  store %"struct.kc::impl_casestring__Str"* %_file, %"struct.kc::impl_casestring__Str"** %_file_addr
+  store i32 %_line, i32* %_line_addr
+  %0 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %0, i32 0, i32 0
+  call void @_ZN2kc13impl_filelineC2Ev() nounwind
+  %2 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %3 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %2, i32 0, i32 0
+  %4 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %3, i32 0, i32 0
+  %5 = getelementptr inbounds %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_abstract_phylum"* %4, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*], [13 x i32 (...)*]* @_ZTVN2kc22impl_fileline_FileLineE, i32 0, i32 2), i32 (...)*** %5, align 4
+  %6 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  %7 = icmp eq %"struct.kc::impl_casestring__Str"* %6, null
+  br i1 %7, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %8 = invoke %"struct.kc::impl_casestring__Str"* @_ZN2kc12mkcasestringEPKci()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %bb
+  store %"struct.kc::impl_casestring__Str"* %8, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb1:                                              ; preds = %entry
+  %9 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  store %"struct.kc::impl_casestring__Str"* %9, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %invcont
+  %10 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %11 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %10, i32 0, i32 0
+  %12 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %11, i32 0, i32 1
+  %13 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  store %"struct.kc::impl_casestring__Str"* %13, %"struct.kc::impl_casestring__Str"** %12, align 4
+  %14 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %15 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %14, i32 0, i32 0
+  %16 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %15, i32 0, i32 2
+  %17 = load i32, i32* %_line_addr, align 4
+  store i32 %17, i32* %16, align 4
+  ret void
+
+lpad:                                             ; preds = %bb
+  %eh_ptr = landingpad { i8*, i32 }
+              cleanup
+  %exn = extractvalue { i8*, i32 } %eh_ptr, 0
+  store i8* %exn, i8** %eh_exception
+  %eh_ptr4 = load i8*, i8** %eh_exception
+  %eh_select5 = extractvalue { i8*, i32 } %eh_ptr, 1
+  store i32 %eh_select5, i32* %eh_selector
+  %eh_select = load i32, i32* %eh_selector
+  store i32 %eh_select, i32* %save_filt.148, align 4
+  %eh_value = load i8*, i8** %eh_exception
+  store i8* %eh_value, i8** %save_eptr.147, align 4
+  %18 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %19 = bitcast %"struct.kc::impl_fileline_FileLine"* %18 to %"struct.kc::impl_fileline"*
+  call void @_ZN2kc13impl_filelineD2Ev(%"struct.kc::impl_fileline"* %19) nounwind
+  %20 = load i8*, i8** %save_eptr.147, align 4
+  store i8* %20, i8** %eh_exception, align 4
+  %21 = load i32, i32* %save_filt.148, align 4
+  store i32 %21, i32* %eh_selector, align 4
+  %eh_ptr6 = load i8*, i8** %eh_exception
+  call void @_Unwind_Resume_or_Rethrow()
+  unreachable
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Unwind_Resume_or_Rethrow()
+
+define void @_ZN2kc21printer_functor_classC2Ev(%"struct.kc::impl_abstract_phylum"* %this) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define %"struct.kc::impl_Ccode_option"* @_ZN2kc11phylum_castIPNS_17impl_withcaseinfoES1_EET_PT0_(%"struct.kc::impl_Ccode_option"* %t) nounwind {
+entry:
+  ret %"struct.kc::impl_Ccode_option"* null
+}
+
+define %"struct.kc::impl_abstract_phylum"* @_ZNK2kc43impl_ac_direct_declarator_AcDirectDeclProto9subphylumEi(%"struct.kc::impl_ac_abstract_declarator_AcAbsdeclDirdecl"* %this, i32 %no) nounwind align 2 {
+entry:
+  ret %"struct.kc::impl_abstract_phylum"* undef
+}
+
+define void @_ZN2kc30impl_withcaseinfo_WithcaseinfoD0Ev(%"struct.kc::impl_withcaseinfo_Withcaseinfo"* %this) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define void @_ZN2kc30impl_withcaseinfo_WithcaseinfoC1EPNS_26impl_patternrepresentationES2_PNS_10impl_CtextE(%"struct.kc::impl_withcaseinfo_Withcaseinfo"* %this, %"struct.kc::impl_outmostpatterns"* %_patternrepresentation_1, %"struct.kc::impl_outmostpatterns"* %_patternrepresentation_2, %"struct.kc::impl_Ctext"* %_Ctext_1) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define void @_ZN2kc21impl_rewriteviewsinfoC2EPNS_20impl_rewriteviewinfoEPS0_(%"struct.kc::impl_CexpressionDQ"* %this, %"struct.kc::impl_Ccode_option"* %p1, %"struct.kc::impl_CexpressionDQ"* %p2) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define %"struct.kc::impl_Ctext_elem"* @_ZN2kc11phylum_castIPNS_9impl_termENS_20impl_abstract_phylumEEET_PT0_(%"struct.kc::impl_abstract_phylum"* %t) nounwind {
+entry:
+  unreachable
+}
+
+define void @_ZN2kc27impl_ac_parameter_type_listD2Ev(%"struct.kc::impl_Ccode_option"* %this) nounwind align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN2kc21impl_ac_operator_nameD2Ev(%"struct.kc::impl_Ctext_elem"* %this) nounwind align 2 {
+entry:
+  ret void
+}
+
+declare %"struct.kc::impl_casestring__Str"* @_ZN2kc12mkcasestringEPKci()

Added: llvm/trunk/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/2013-01-10-MergeFuncAssert.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt -mergefunc -disable-output < %s
+; This used to trigger a ConstantExpr::getBitCast assertion.
+
+define void @t1() unnamed_addr uwtable ssp align 2 {
+entry:
+  switch i32 undef, label %sw.bb12 [
+    i32 127, label %sw.bb
+    i32 126, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry
+  unreachable
+
+sw.bb4:                                           ; preds = %entry
+  unreachable
+
+sw.bb12:                                          ; preds = %entry
+  ret void
+}
+
+define void @t2() unnamed_addr uwtable ssp align 2 {
+entry:
+  switch i32 undef, label %sw.bb8 [
+    i32 4, label %sw.bb
+    i32 3, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry
+  unreachable
+
+sw.bb4:                                           ; preds = %entry
+  ret void
+
+sw.bb8:                                           ; preds = %entry
+  unreachable
+}

Added: llvm/trunk/test/Transforms/MergeFunc/address-spaces.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/address-spaces.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/address-spaces.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/address-spaces.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,35 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+target datalayout = "p:32:32:32-p1:32:32:32-p2:16:16:16"
+
+declare void @foo(i32) nounwind
+
+; None of these functions should be merged
+
+define i32 @store_as0(i32* %x) {
+; CHECK-LABEL: @store_as0(
+; CHECK: call void @foo(
+  %gep = getelementptr i32, i32* %x, i32 4
+  %y = load i32, i32* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+
+define i32 @store_as1(i32 addrspace(1)* %x) {
+; CHECK-LABEL: @store_as1(
+; CHECK: call void @foo(
+  %gep = getelementptr i32, i32 addrspace(1)* %x, i32 4
+  %y = load i32, i32 addrspace(1)* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+
+define i32 @store_as2(i32 addrspace(2)* %x) {
+; CHECK-LABEL: @store_as2(
+; CHECK: call void @foo(
+  %gep = getelementptr i32, i32 addrspace(2)* %x, i32 4
+  %y = load i32, i32 addrspace(2)* %gep
+  call void @foo(i32 %y) nounwind
+  ret i32 %y
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/alias.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/alias.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/alias.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/alias.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,116 @@
+; RUN: opt -S -mergefunc -mergefunc-use-aliases < %s | FileCheck %s
+
+; Aliases should always be created for the weak functions, and
+; for external functions if there is no local function
+
+; CHECK: @external_external_2 = unnamed_addr alias void (float*), bitcast (void (i32*)* @external_external_1 to void (float*)*)
+; CHECK: @weak_weak_2 = weak unnamed_addr alias void (float*), bitcast (void (i32*)* @0 to void (float*)*)
+; CHECK: @weak_weak_1 = weak unnamed_addr alias void (i32*), void (i32*)* @0
+; CHECK: @weak_external_1 = weak unnamed_addr alias void (i32*), bitcast (void (float*)* @weak_external_2 to void (i32*)*)
+; CHECK: @external_weak_2 = weak unnamed_addr alias void (float*), bitcast (void (i32*)* @external_weak_1 to void (float*)*)
+; CHECK: @weak_internal_1 = weak unnamed_addr alias void (i32*), bitcast (void (float*)* @weak_internal_2 to void (i32*)*)
+; CHECK: @internal_weak_2 = weak unnamed_addr alias void (float*), bitcast (void (i32*)* @internal_weak_1 to void (float*)*)
+
+; A strong backing function had to be created for the weak-weak pair
+
+; CHECK: define private void @0(i32* %a) unnamed_addr
+; CHECK_NEXT: call void @dummy4()
+
+; These internal functions are dropped in favor of the external ones
+
+; CHECK-NOT: define internal void @external_internal_2(float *%a) unnamed_addr
+; CHECK-NOT: define internal void @internal_external_1(i32 *%a) unnamed_addr
+; CHECK-NOT: define internal void @internal_external_1(i32 *%a) unnamed_addr
+; CHECK-NOT: define internal void @internal_external_2(float *%a) unnamed_addr
+
+; Only used to mark which functions should be merged.
+declare void @dummy1()
+declare void @dummy2()
+declare void @dummy3()
+declare void @dummy4()
+declare void @dummy5()
+declare void @dummy6()
+declare void @dummy7()
+declare void @dummy8()
+declare void @dummy9()
+
+define void @external_external_1(i32 *%a) unnamed_addr {
+  call void @dummy1()
+  ret void
+}
+define void @external_external_2(float *%a) unnamed_addr {
+  call void @dummy1()
+  ret void
+}
+
+define void @external_internal_1(i32 *%a) unnamed_addr {
+  call void @dummy2()
+  ret void
+}
+define internal void @external_internal_2(float *%a) unnamed_addr {
+  call void @dummy2()
+  ret void
+}
+
+define internal void @internal_external_1(i32 *%a) unnamed_addr {
+  call void @dummy3()
+  ret void
+}
+define void @internal_external_2(float *%a) unnamed_addr {
+  call void @dummy3()
+  ret void
+}
+
+define weak void @weak_weak_1(i32 *%a) unnamed_addr {
+  call void @dummy4()
+  ret void
+}
+define weak void @weak_weak_2(float *%a) unnamed_addr {
+  call void @dummy4()
+  ret void
+}
+
+define weak void @weak_external_1(i32 *%a) unnamed_addr {
+  call void @dummy5()
+  ret void
+}
+define external void @weak_external_2(float *%a) unnamed_addr {
+  call void @dummy5()
+  ret void
+}
+
+define external void @external_weak_1(i32 *%a) unnamed_addr {
+  call void @dummy6()
+  ret void
+}
+define weak void @external_weak_2(float *%a) unnamed_addr {
+  call void @dummy6()
+  ret void
+}
+
+define weak void @weak_internal_1(i32 *%a) unnamed_addr {
+  call void @dummy7()
+  ret void
+}
+define internal void @weak_internal_2(float *%a) unnamed_addr {
+  call void @dummy7()
+  ret void
+}
+
+define internal void @internal_weak_1(i32 *%a) unnamed_addr {
+  call void @dummy8()
+  ret void
+}
+define weak void @internal_weak_2(float *%a) unnamed_addr {
+  call void @dummy8()
+  ret void
+}
+
+define internal void @internal_internal_1(i32 *%a) unnamed_addr {
+  call void @dummy9()
+  ret void
+}
+define internal void @internal_internal_2(float *%a) unnamed_addr {
+  call void @dummy9()
+  ret void
+}

Added: llvm/trunk/test/Transforms/MergeFunc/alloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/alloca.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/alloca.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/alloca.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,61 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+
+;; Make sure that two different allocas are not treated as equal.
+
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+
+%kv1 = type { i32, i32 }
+%kv2 = type { i8 }
+%kv3 = type { i64, i64 }
+
+; Size difference.
+
+; CHECK-LABEL: define void @size1
+; CHECK-NOT: call void @
+define void @size1(i8 *%f) {
+  %v = alloca %kv1, align 8
+  %f_2 = bitcast i8* %f to void (%kv1 *)*
+  call void %f_2(%kv1 * %v)
+  call void %f_2(%kv1 * %v)
+  call void %f_2(%kv1 * %v)
+  call void %f_2(%kv1 * %v)
+  ret void
+}
+
+; CHECK-LABEL: define void @size2
+; CHECK-NOT: call void @
+define void @size2(i8 *%f) {
+  %v = alloca %kv2, align 8
+  %f_2 = bitcast i8* %f to void (%kv2 *)*
+  call void %f_2(%kv2 * %v)
+  call void %f_2(%kv2 * %v)
+  call void %f_2(%kv2 * %v)
+  call void %f_2(%kv2 * %v)
+  ret void
+}
+
+; Alignment difference.
+
+; CHECK-LABEL: define void @align1
+; CHECK-NOT: call void @
+define void @align1(i8 *%f) {
+  %v = alloca %kv3, align 8
+  %f_2 = bitcast i8* %f to void (%kv3 *)*
+  call void %f_2(%kv3 * %v)
+  call void %f_2(%kv3 * %v)
+  call void %f_2(%kv3 * %v)
+  call void %f_2(%kv3 * %v)
+  ret void
+}
+
+; CHECK-LABEL: define void @align2
+; CHECK-NOT: call void @
+define void @align2(i8 *%f) {
+  %v = alloca %kv3, align 16
+  %f_2 = bitcast i8* %f to void (%kv3 *)*
+  call void %f_2(%kv3 * %v)
+  call void %f_2(%kv3 * %v)
+  call void %f_2(%kv3 * %v)
+  call void %f_2(%kv3 * %v)
+  ret void
+}

Added: llvm/trunk/test/Transforms/MergeFunc/apply_function_attributes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/apply_function_attributes.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/apply_function_attributes.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/apply_function_attributes.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+%Opaque_type = type opaque
+%S2i = type <{ i64, i64 }>
+%D2i = type <{ i64, i64 }>
+%Di = type <{ i32 }>
+%Si = type <{ i32 }>
+
+define void @B(%Opaque_type* sret %a, %S2i* %b, i32* %xp, i32* %yp) {
+  %x = load i32, i32* %xp
+  %y = load i32, i32* %yp
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = add i32 %sum2, %y
+  ret void
+}
+
+define void @C(%Opaque_type* sret %a, %S2i* %b, i32* %xp, i32* %yp) {
+  %x = load i32, i32* %xp
+  %y = load i32, i32* %yp
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = add i32 %sum2, %y
+  ret void
+}
+
+define void @A(%Opaque_type* sret %a, %D2i* %b, i32* %xp, i32* %yp) {
+  %x = load i32, i32* %xp
+  %y = load i32, i32* %yp
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = add i32 %sum2, %y
+  ret void
+}
+
+; Make sure we transfer the parameter attributes to the call site.
+; CHECK-LABEL: define void @C(%Opaque_type* sret
+; CHECK:  tail call void bitcast (void (%Opaque_type*, %D2i*, i32*, i32*)* @A to void (%Opaque_type*, %S2i*, i32*, i32*)*)(%Opaque_type* sret %0, %S2i* %1, i32* %2, i32* %3)
+; CHECK:  ret void
+
+
+; Make sure we transfer the parameter attributes to the call site.
+; CHECK-LABEL: define void @B(%Opaque_type* sret
+; CHECK:  %5 = bitcast
+; CHECK:  tail call void @A(%Opaque_type* sret %0, %D2i* %5, i32* %2, i32* %3)
+; CHECK:  ret void
+

Added: llvm/trunk/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,92 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+
+define i8 @call_with_range() {
+  bitcast i8 0 to i8 ; dummy to make the function large enough
+  %out = call i8 @dummy(), !range !0
+  ret i8 %out
+}
+
+define i8 @call_no_range() {
+; CHECK-LABEL: @call_no_range
+; CHECK-NEXT: bitcast i8 0 to i8
+; CHECK-NEXT: %out = call i8 @dummy()
+; CHECK-NEXT: ret i8 %out
+  bitcast i8 0 to i8
+  %out = call i8 @dummy()
+  ret i8 %out
+}
+
+define i8 @call_different_range() {
+; CHECK-LABEL: @call_different_range
+; CHECK-NEXT: bitcast i8 0 to i8
+; CHECK-NEXT: %out = call i8 @dummy(), !range !1
+; CHECK-NEXT: ret i8 %out
+  bitcast i8 0 to i8
+  %out = call i8 @dummy(), !range !1
+  ret i8 %out
+}
+
+define i8 @invoke_with_range() personality i8* undef {
+  %out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0
+
+next:
+  ret i8 %out
+
+lpad:
+  %pad = landingpad { i8*, i32 } cleanup
+  resume { i8*, i32 } zeroinitializer
+}
+
+define i8 @invoke_no_range() personality i8* undef {
+; CHECK-LABEL: @invoke_no_range()
+; CHECK-NEXT: invoke i8 @dummy
+  %out = invoke i8 @dummy() to label %next unwind label %lpad
+
+next:
+  ret i8 %out
+
+lpad:
+  %pad = landingpad { i8*, i32 } cleanup
+  resume { i8*, i32 } zeroinitializer
+}
+
+define i8 @invoke_different_range() personality i8* undef {
+; CHECK-LABEL: @invoke_different_range()
+; CHECK-NEXT: invoke i8 @dummy
+  %out = invoke i8 @dummy() to label %next unwind label %lpad, !range !1
+
+next:
+  ret i8 %out
+
+lpad:
+  %pad = landingpad { i8*, i32 } cleanup
+  resume { i8*, i32 } zeroinitializer
+}
+
+define i8 @invoke_with_same_range() personality i8* undef {
+; CHECK-LABEL: @invoke_with_same_range()
+; CHECK: tail call i8 @invoke_with_range()
+  %out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0
+
+next:
+  ret i8 %out
+
+lpad:
+  %pad = landingpad { i8*, i32 } cleanup
+  resume { i8*, i32 } zeroinitializer
+}
+
+define i8 @call_with_same_range() {
+; CHECK-LABEL: @call_with_same_range
+; CHECK: tail call i8 @call_with_range
+  bitcast i8 0 to i8
+  %out = call i8 @dummy(), !range !0
+  ret i8 %out
+}
+
+
+declare i8 @dummy();
+declare i32 @__gxx_personality_v0(...)
+
+!0 = !{i8 0, i8 2}
+!1 = !{i8 5, i8 7}

Added: llvm/trunk/test/Transforms/MergeFunc/constant-entire-value.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/constant-entire-value.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/constant-entire-value.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/constant-entire-value.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; RUN: opt -S -mergefunc < %s | FileCheck -check-prefix=NOPLUS %s
+
+; This makes sure that zeros in constants don't cause problems with string based
+; memory comparisons
+define internal i32 @sum(i32 %x, i32 %y) {
+; CHECK-LABEL: @sum
+  %sum = add i32 %x, %y
+  %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 2 ], 2
+  %sum2 = add i32 %sum, %1
+  %sum3 = add i32 %sum2, %y
+  ret i32 %sum3
+}
+
+define internal i32 @add(i32 %x, i32 %y) {
+; CHECK-LABEL: @add
+  %sum = add i32 %x, %y
+  %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 1 ], 2
+  %sum2 = add i32 %sum, %1
+  %sum3 = add i32 %sum2, %y
+  ret i32 %sum3
+}
+
+define internal i32 @plus(i32 %x, i32 %y) {
+; NOPLUS-NOT: @plus
+  %sum = add i32 %x, %y
+  %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 5 ], 2
+  %sum2 = add i32 %sum, %1
+  %sum3 = add i32 %sum2, %y
+  ret i32 %sum3
+}
+
+define internal i32 @next(i32 %x, i32 %y) {
+; CHECK-LABEL: @next
+  %sum = add i32 %x, %y
+  %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 5 ], 2
+  %sum2 = add i32 %sum, %1
+  %sum3 = add i32 %sum2, %y
+  ret i32 %sum3
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/crash.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,46 @@
+; RUN: opt -mergefunc -disable-output < %s
+; PR15185
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+%.qux.2496 = type { i32, %.qux.2497 }
+%.qux.2497 = type { i8, i32 }
+%.qux.2585 = type { i32, i32, i8* }
+
+ at g2 = external unnamed_addr constant [9 x i8], align 1
+ at g3 = internal unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585*)* @func35 to i8*)]
+
+define internal i32 @func1(i32* %ptr, { i32, i32 }* nocapture %method) align 2 {
+  br label %1
+
+; <label>:1
+  br label %2
+
+; <label>:2
+  ret i32 undef
+}
+
+define internal i32 @func10(%.qux.2496* nocapture %this) align 2 {
+  %1 = getelementptr inbounds %.qux.2496, %.qux.2496* %this, i32 0, i32 1, i32 1
+  %2 = load i32, i32* %1, align 4
+  ret i32 %2
+}
+
+define internal i8* @func29(i32* nocapture %this) align 2 {
+  ret i8* getelementptr inbounds ([9 x i8], [9 x i8]* @g2, i32 0, i32 0)
+}
+
+define internal i32* @func33(%.qux.2585* nocapture %this) align 2 {
+  ret i32* undef
+}
+
+define internal i32* @func34(%.qux.2585* nocapture %this) align 2 {
+  %1 = getelementptr inbounds %.qux.2585, %.qux.2585* %this, i32 0
+  ret i32* undef
+}
+
+define internal i8* @func35(%.qux.2585* nocapture %this) align 2 {
+  %1 = getelementptr inbounds %.qux.2585, %.qux.2585* %this, i32 0, i32 2
+  %2 = load i8*, i8** %1, align 4
+  ret i8* %2
+}

Added: llvm/trunk/test/Transforms/MergeFunc/crash2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/crash2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/crash2.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/crash2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,54 @@
+; RUN: opt %s -mergefunc -globalopt -S -o - | FileCheck %s
+
+; Make sure we don't crash on this example. This test is supposed to test that
+; MergeFunctions clears its GlobalNumbers value map. If this map still contains
+; entries when running globalopt and the MergeFunctions instance is still alive
+; the optimization of @G would cause an assert because globalopt would do an
+; RAUW on @G which still exists as an entry in the GlobalNumbers ValueMap which
+; causes an assert in the ValueHandle call back because we are RAUWing with a
+; different type (AllocaInst) than its key type (GlobalValue).
+
+ at G = internal global i8** null
+ at G2 = internal global i8** null
+
+define i32 @main(i32 %argc, i8** %argv) norecurse {
+; CHECK: alloca
+  store i8** %argv, i8*** @G
+  ret i32 0
+}
+
+define internal i8** @dead1(i64 %p) {
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  %tmp = load i8**, i8*** @G
+  ret i8** %tmp
+}
+
+define internal i8** @dead2(i64 %p) {
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  %tmp = load i8**, i8*** @G2
+  ret i8** %tmp
+}
+
+define void @left(i64 %p) {
+entry-block:
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  ret void
+}
+
+define void @right(i64 %p) {
+entry-block:
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  ret void
+}

Added: llvm/trunk/test/Transforms/MergeFunc/external-before-local.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/external-before-local.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/external-before-local.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/external-before-local.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,55 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; We should normalize to test2 rather than test1,
+; because it allows us to drop test1 entirely
+
+; CHECK-NOT: define internal void @test1() unnamed_addr
+; CHECK: define void @test3() unnamed_addr
+; CHECK-NEXT: call void @test2()
+; CHECK-NEXT: call void @test2()
+  
+declare void @dummy()
+
+define internal void @test1() unnamed_addr {
+    call void @dummy()
+    call void @dummy()
+    ret void
+}
+
+define void @test2() unnamed_addr {
+    call void @dummy()
+    call void @dummy()
+    ret void
+}
+
+define void @test3() unnamed_addr {
+    call void @test1()
+    call void @test2()
+    ret void
+}
+
+; We should normalize to the existing test6 rather than
+; to a new anonymous strong backing function
+
+; CHECK: define weak void @test5()
+; CHECK-NEXT: tail call void @test6()
+; CHECK: define weak void @test4()
+; CHECK-NEXT: tail call void @test6()
+
+declare void @dummy2()
+  
+define weak void @test4() {
+    call void @dummy2()
+    call void @dummy2()
+    ret void
+}
+define weak void @test5() {
+    call void @dummy2()
+    call void @dummy2()
+    ret void
+}
+define void @test6() {
+    call void @dummy2()
+    call void @dummy2()
+    ret void
+}

Added: llvm/trunk/test/Transforms/MergeFunc/fold-weak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/fold-weak.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/fold-weak.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/fold-weak.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+define weak i32 @sum(i32 %x, i32 %y) {
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = add i32 %sum2, %y
+  ret i32 %sum3
+}
+
+define weak i32 @add(i32 %x, i32 %y) {
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = add i32 %sum2, %y
+  ret i32 %sum3
+}
+
+; Don't replace a weak function use by another equivalent function. We don't
+; know whether the symbol that will ulitmately be linked is equivalent - we
+; don't know that the weak definition is the definitive definition or whether it
+; will be overriden by a stronger definition).
+
+; CHECK-LABEL: define private i32 @0
+; CHECK: add i32
+; CHECK: add i32
+; CHECK: add i32
+; CHECK: ret
+
+; CHECK-LABEL: define i32 @use_weak
+; CHECK: call i32 @add
+; CHECK: call i32 @sum
+; CHECK: ret
+
+; CHECK-LABEL: define weak i32 @sum
+; CHECK:  tail call i32 @0
+; CHECK:  ret
+
+; CHECK-LABEL: define weak i32 @add
+; CHECK:  tail call i32 @0
+; CHECK:  ret
+
+
+define i32 @use_weak(i32 %a, i32 %b) {
+  %res = call i32 @add(i32 %a, i32 %b)
+  %res2 = call i32 @sum(i32 %a, i32 %b)
+  %res3 = add i32 %res, %res2
+  ret i32 %res3
+}

Added: llvm/trunk/test/Transforms/MergeFunc/functions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/functions.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/functions.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/functions.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; Be sure we don't merge cross-referenced functions of same type.
+
+; CHECK-LABEL: @left
+; CHECK-LABEL: entry-block
+; CHECK-LABEL: call void @right(i64 %p)
+define void @left(i64 %p) {
+entry-block:
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  ret void
+}
+
+; CHECK-LABEL: @right
+; CHECK-LABEL: entry-block
+; CHECK-LABEL: call void @left(i64 %p)
+define void @right(i64 %p) {
+entry-block:
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  ret void
+}

Added: llvm/trunk/test/Transforms/MergeFunc/gep-base-type.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/gep-base-type.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/gep-base-type.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/gep-base-type.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,46 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, the type of the GEP pointer argument does not have
+; the same stride.
+
+%"struct1" = type <{ i8*, i32, [4 x i8] }>
+%"struct2" = type { i8*, { i64, i64 } }
+
+define internal %struct2* @Ffunc(%struct2* %P, i64 %i) {
+; CHECK-LABEL: @Ffunc(
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: ret
+  %1 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  %2 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  %3 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  %4 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  %5 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  %6 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  ret %struct2* %6
+}
+
+
+define internal %struct1* @Gfunc(%struct1* %P, i64 %i) {
+; CHECK-LABEL: @Gfunc(
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: ret
+  %1 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  %2 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  %3 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  %4 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  %5 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  %6 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  ret %struct1* %6
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/inline-asm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/inline-asm.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/inline-asm.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/inline-asm.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,53 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+
+; CHECK-LABEL: @int_ptr_arg_different
+; CHECK-NEXT: call void asm
+
+; CHECK-LABEL: @int_ptr_null
+; CHECK-NEXT: tail call void @float_ptr_null()
+
+; CHECK-LABEL: @int_ptr_arg_same
+; CHECK-NEXT: %2 = bitcast i32* %0 to float*
+; CHECK-NEXT: tail call void @float_ptr_arg_same(float* %2)
+
+; Used to satisfy minimum size limit
+declare void @stuff()
+
+; Can be merged
+define void @float_ptr_null() {
+  call void asm "nop", "r"(float* null)
+  call void @stuff()
+  ret void
+}
+
+define void @int_ptr_null() {
+  call void asm "nop", "r"(i32* null)
+  call void @stuff()
+  ret void
+}
+
+; Can be merged (uses same argument differing by pointer type)
+define void @float_ptr_arg_same(float*) {
+  call void asm "nop", "r"(float* %0)
+  call void @stuff()
+  ret void
+}
+
+define void @int_ptr_arg_same(i32*) {
+  call void asm "nop", "r"(i32* %0)
+  call void @stuff()
+  ret void
+}
+
+; Can not be merged (uses different arguments)
+define void @float_ptr_arg_different(float*, float*) {
+  call void asm "nop", "r"(float* %0)
+  call void @stuff()
+  ret void
+}
+
+define void @int_ptr_arg_different(i32*, i32*) {
+  call void asm "nop", "r"(i32* %1)
+  call void @stuff()
+  ret void
+}

Added: llvm/trunk/test/Transforms/MergeFunc/inttoptr-address-space.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/inttoptr-address-space.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/inttoptr-address-space.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/inttoptr-address-space.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-n8:16:32-S128"
+
+%.qux.2496 = type { i32, %.qux.2497 }
+%.qux.2497 = type { i8, i32 }
+%.qux.2585 = type { i32, i32, i8* }
+
+ at g2 = external addrspace(1) constant [9 x i8], align 1
+ at g3 = internal unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585 addrspace(1)*)* @func35 to i8*)]
+
+
+define internal i32 @func10(%.qux.2496 addrspace(1)* nocapture %this) align 2 {
+bb:
+  %tmp = getelementptr inbounds %.qux.2496, %.qux.2496 addrspace(1)* %this, i32 0, i32 1, i32 1
+  %tmp1 = load i32, i32 addrspace(1)* %tmp, align 4
+  ret i32 %tmp1
+}
+
+; Check for pointer bitwidth equal assertion failure
+define internal i8* @func35(%.qux.2585 addrspace(1)* nocapture %this) align 2 {
+bb:
+; CHECK-LABEL: @func35(
+; CHECK: %[[V2:.+]] = bitcast %.qux.2585 addrspace(1)* %{{.*}} to %.qux.2496 addrspace(1)*
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496 addrspace(1)* nocapture %[[V2]])
+; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
+  %tmp = getelementptr inbounds %.qux.2585, %.qux.2585 addrspace(1)* %this, i32 0, i32 2
+  %tmp1 = load i8*, i8* addrspace(1)* %tmp, align 4
+  ret i8* %tmp1
+}

Added: llvm/trunk/test/Transforms/MergeFunc/inttoptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/inttoptr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/inttoptr.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/inttoptr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; PR15185
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+%.qux.2496 = type { i32, %.qux.2497 }
+%.qux.2497 = type { i8, i32 }
+%.qux.2585 = type { i32, i32, i8* }
+
+ at g2 = external unnamed_addr constant [9 x i8], align 1
+ at g3 = internal unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585*)* @func35 to i8*)]
+
+define internal i32 @func1(i32* %ptr, { i32, i32 }* nocapture %method) align 2 {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb1
+  ret i32 undef
+}
+
+define internal i32 @func10(%.qux.2496* nocapture %this) align 2 {
+bb:
+  %tmp = getelementptr inbounds %.qux.2496, %.qux.2496* %this, i32 0, i32 1, i32 1
+  %tmp1 = load i32, i32* %tmp, align 4
+  ret i32 %tmp1
+}
+
+define internal i8* @func29(i32* nocapture %this) align 2 {
+bb:
+  ret i8* getelementptr inbounds ([9 x i8], [9 x i8]* @g2, i32 0, i32 0)
+}
+
+define internal i32* @func33(%.qux.2585* nocapture %this) align 2 {
+bb:
+  ret i32* undef
+}
+
+define internal i32* @func34(%.qux.2585* nocapture %this) align 2 {
+bb:
+  %tmp = getelementptr inbounds %.qux.2585, %.qux.2585* %this, i32 0
+  ret i32* undef
+}
+
+define internal i8* @func35(%.qux.2585* nocapture %this) align 2 {
+bb:
+; CHECK-LABEL: @func35(
+; CHECK: %[[V2:.+]] = bitcast %.qux.2585* %{{.*}} to %.qux.2496*
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* nocapture %[[V2]])
+; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
+  %tmp = getelementptr inbounds %.qux.2585, %.qux.2585* %this, i32 0, i32 2
+  %tmp1 = load i8*, i8** %tmp, align 4
+  ret i8* %tmp1
+}

Added: llvm/trunk/test/Transforms/MergeFunc/linkonce_odr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/linkonce_odr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/linkonce_odr.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/linkonce_odr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s -implicit-check-not=funC
+
+; Replacments should be totally ordered on the function name.
+; If we don't do this we  can end up with one module defining a thunk for @funA
+; and another module defining a thunk for @funB.
+;
+; The problem with this is that the linker could then choose these two stubs
+; each of the two modules and we end up with two stubs calling each other.
+
+; CHECK-LABEL: define linkonce_odr i32 @funA
+; CHECK-NEXT:    add
+; CHECK:         ret
+
+; CHECK-LABEL: define linkonce_odr i32 @funB
+; CHECK-NEXT:    tail call i32 @funA(i32 %0, i32 %1)
+; CHECK-NEXT:    ret
+
+define linkonce_odr i32 @funC(i32 %x, i32 %y) {
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %x, %sum
+  %sum3 = add i32 %x, %sum2
+  ret i32 %sum3
+}
+
+define linkonce_odr i32 @funB(i32 %x, i32 %y) {
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %x, %sum
+  %sum3 = add i32 %x, %sum2
+  ret i32 %sum3
+}
+
+define linkonce_odr i32 @funA(i32 %x, i32 %y) {
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %x, %sum
+  %sum3 = add i32 %x, %sum2
+  ret i32 %sum3
+}
+
+; This creates a use of @funB, preventing -mergefunc from deleting it.
+; @funC, however, can safely be deleted as it has no uses, and is discardable
+; if unused.
+ at take_addr_of_funB = global i8* bitcast (i32 (i32, i32)* @funB to i8*)

Added: llvm/trunk/test/Transforms/MergeFunc/merge-block-address-other-function.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/merge-block-address-other-function.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/merge-block-address-other-function.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/merge-block-address-other-function.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,49 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 3, i32* %retval
+  br label %return
+
+if.end:
+  %1 = load i32, i32* %i.addr, align 4
+  %cmp1 = icmp eq i32 %1, 3
+  br i1 %cmp1, label %if.then.2, label %if.end.3
+
+if.then.2:
+  store i32 56, i32* %retval
+  br label %return
+
+if.end.3:
+  store i32 0, i32* %retval
+  br label %return
+
+return:
+  %2 = load i32, i32* %retval
+  ret i32 %2
+}
+
+
+define internal i8* @Afunc(i32* %P) {
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
+
+define internal i8* @Bfunc(i32* %P) {
+; CHECK-NOT: @Bfunc
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then.2)
+}

Added: llvm/trunk/test/Transforms/MergeFunc/merge-block-address.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/merge-block-address.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/merge-block-address.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/merge-block-address.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,91 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; These two functions are identical. The basic block labels are the same, and
+; induce the same CFG. We are testing that block addresses within different
+; functions are compared by their value, and not based on order. Both functions
+; come from the same C-code, but in the first the two val_0/val_1 basic blocks
+; are in a different order (they were manually switched post-compilation).
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+  store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
+define i32 @_Z1gi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1gi
+; CHECK-NEXT: tail call i32 @_Z1fi
+; CHECK-NEXT: ret
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+  store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Afunc and Bfunc differ only in that one returns i64, the other a pointer.
+; These should be merged.
+define internal i64 @Afunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Afunc
+  store i32 4, i32* %P
+  store i32 6, i32* %Q
+  ret i64 0
+}
+
+define internal i64* @Bfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Bfunc
+  store i32 4, i32* %P
+  store i32 6, i32* %Q
+  ret i64* null
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/merge-different-vector-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/merge-different-vector-types.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/merge-different-vector-types.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/merge-different-vector-types.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Merging should still work even if the values are wrapped in a vector.
+define internal <2 x i64> @Mfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal <2 x i64> @Mfunc
+  store i32 1, i32* %P
+  store i32 1, i32* %Q
+  ret <2 x i64> <i64 0, i64 0>
+}
+
+define internal <2 x i64*> @Nfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Nfunc
+  store i32 1, i32* %P
+  store i32 1, i32* %Q
+  ret <2 x i64*> <i64* null, i64* null>
+}

Added: llvm/trunk/test/Transforms/MergeFunc/merge-ptr-and-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/merge-ptr-and-int.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/merge-ptr-and-int.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/merge-ptr-and-int.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+
+declare void @stuff()
+
+; CHECK-LABEL: @f0(
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+; CHECK-LABEL: @f1(
+; CHECK: ptrtoint i64*
+; CHECK: tail call void @f0(i64
+
+define void @f1(i64* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/merge-small-unnamed-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/merge-small-unnamed-addr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/merge-small-unnamed-addr.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/merge-small-unnamed-addr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,14 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; CHECK-NOT: @b
+
+ at x = constant { void ()*, void ()* } { void ()* @a, void ()* @b }
+; CHECK: { void ()* @a, void ()* @a }
+
+define internal void @a() unnamed_addr {
+  ret void
+}
+
+define internal void @b() unnamed_addr {
+  ret void
+}

Added: llvm/trunk/test/Transforms/MergeFunc/merge-unnamed-addr-bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/merge-unnamed-addr-bitcast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/merge-unnamed-addr-bitcast.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/merge-unnamed-addr-bitcast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+%A = type { i32 }
+%B = type { i32 }
+
+; CHECK-NOT: @b
+
+ at x = constant { i32 (i32)*, i32 (i32)* }
+  { i32 (i32)* bitcast (i32 (%A)* @a to i32 (i32)*),
+    i32 (i32)* bitcast (i32 (%B)* @b to i32 (i32)*) }
+; CHECK: { i32 (i32)* bitcast (i32 (%A)* @a to i32 (i32)*), i32 (i32)* bitcast (i32 (%A)* @a to i32 (i32)*) }
+
+define internal i32 @a(%A) unnamed_addr {
+  extractvalue %A %0, 0
+  xor i32 %2, 0
+  ret i32 %3
+}
+
+define internal i32 @b(%B) unnamed_addr {
+  extractvalue %B %0, 0
+  xor i32 %2, 0
+  ret i32 %3
+}
+
+define i32 @c(i32) {
+  insertvalue %B undef, i32 %0, 0
+  call i32 @b(%B %2)
+; CHECK: call i32 bitcast (i32 (%A)* @a to i32 (%B)*)(%B %2)
+  ret i32 %3
+}

Added: llvm/trunk/test/Transforms/MergeFunc/merge-unnamed-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/merge-unnamed-addr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/merge-unnamed-addr.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/merge-unnamed-addr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; CHECK-NOT: @b
+
+ at x = constant { i32 (i32)*, i32 (i32)* } { i32 (i32)* @a, i32 (i32)* @b }
+; CHECK: { i32 (i32)* @a, i32 (i32)* @a }
+
+define internal i32 @a(i32 %a) unnamed_addr {
+  %b = xor i32 %a, 0
+  %c = xor i32 %b, 0
+  ret i32 %c
+}
+
+define internal i32 @b(i32 %a) unnamed_addr {
+  %b = xor i32 %a, 0
+  %c = xor i32 %b, 0
+  ret i32 %c
+}

Added: llvm/trunk/test/Transforms/MergeFunc/merge-weak-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/merge-weak-crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/merge-weak-crash.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/merge-weak-crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; CHECK-LABEL: define i32 @func1
+; CHECK: call i32 @func2
+; CHECK: ret
+
+; CHECK-LABEL: define i32 @func2
+; CHECK: call i32 @unknown
+; CHECK: ret
+
+; CHECK-LABEL: define i32 @func4
+; CHECK: call i32 @func2
+; CHECK: ret
+
+; CHECK-LABEL: define weak i32 @func3_weak
+; CHECK: call i32 @func1
+; CHECK: ret
+
+define i32 @func1(i32 %x, i32 %y) {
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = call i32 @func4(i32 %sum, i32 %sum2)
+  ret i32 %sum3
+}
+
+define i32 @func4(i32 %x, i32 %y) {
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = call i32 @unknown(i32 %sum, i32 %sum2)
+  ret i32 %sum3
+}
+
+define weak i32 @func3_weak(i32 %x, i32 %y) {
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = call i32 @func2(i32 %sum, i32 %sum2)
+  ret i32 %sum3
+}
+
+define i32 @func2(i32 %x, i32 %y) {
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = call i32 @unknown(i32 %sum, i32 %sum2)
+  ret i32 %sum3
+}
+
+declare i32 @unknown(i32 %x, i32 %y)

Added: llvm/trunk/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,223 @@
+; RUN: opt -O0 -S -mergefunc -mergefunc-preserve-debug-info < %s | FileCheck %s --check-prefix=OPTIMIZATION_LEVEL_0
+; RUN: opt -O2 -S -mergefunc -mergefunc-preserve-debug-info < %s | FileCheck %s --check-prefix=OPTIMIZATION_LEVEL_2
+
+; Preserve debug info in thunks under -mergefunc -mergefunc-preserve-debug-info
+;
+; We test that:
+; At -O0 we have preserved the generated @llvm.dbg.declare debug intrinsics.
+; At -O2 we have preserved the generated @llvm.dbg.value debug intrinsics.
+; At -O0, stores from the incoming parameters to locations on the stack-frame
+;         and allocas that create these locations on the stack-frame are preserved.
+; Debug info got generated for the call made by the thunk and for its return value.
+; The foregoing is the only content of a thunk's entry block.
+; A thunk makes a tail call to the shared implementation.
+; A thunk's call site is preserved to point to the thunk (with only -mergefunc the
+;   call site is modified to point to the shared implementation) when both occur
+;   within the same translation unit.
+
+; The source code that was used to test and generate this LLVM IR is:
+;
+; int maxA(int x, int y) {
+;   int i, m, j;
+;   if (x > y)
+;     m = x;
+;   else
+;     m = y;
+;   return m;
+; }
+;
+; int maxB(int x, int y) {
+;   int i, m, j;
+;   if (x > y)
+;     m = x;
+;   else
+;     m = y;
+;   return m;
+; }
+;
+; void f(void) {
+;
+;   maxA(3, 4);
+;   maxB(1, 9);
+; }
+
+; Function Attrs: nounwind uwtable
+define i32 @maxA(i32 %x, i32 %y) !dbg !6 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %m = alloca i32, align 4
+  %j = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !11, metadata !12), !dbg !13
+  store i32 %y, i32* %y.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %y.addr, metadata !14, metadata !12), !dbg !15
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !17
+  call void @llvm.dbg.declare(metadata i32* %m, metadata !18, metadata !12), !dbg !19
+  call void @llvm.dbg.declare(metadata i32* %j, metadata !20, metadata !12), !dbg !21
+  %0 = load i32, i32* %x.addr, align 4, !dbg !22
+  %1 = load i32, i32* %y.addr, align 4, !dbg !24
+  %cmp = icmp sgt i32 %0, %1, !dbg !25
+  br i1 %cmp, label %if.then, label %if.else, !dbg !26
+
+if.then:                                          ; preds = %entry
+  %2 = load i32, i32* %x.addr, align 4, !dbg !27
+  store i32 %2, i32* %m, align 4, !dbg !28
+  br label %if.end, !dbg !29
+
+if.else:                                          ; preds = %entry
+  %3 = load i32, i32* %y.addr, align 4, !dbg !30
+  store i32 %3, i32* %m, align 4, !dbg !31
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %4 = load i32, i32* %m, align 4, !dbg !32
+  ret i32 %4, !dbg !33
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+; Function Attrs: nounwind uwtable
+define i32 @maxB(i32 %x, i32 %y) !dbg !34 {
+
+; OPTIMIZATION_LEVEL_0: define i32 @maxB(i32 %x, i32 %y)
+; OPTIMIZATION_LEVEL_0-NEXT: entry:
+; OPTIMIZATION_LEVEL_0-NEXT: %x.addr = alloca i32, align 4
+; OPTIMIZATION_LEVEL_0-NEXT: %y.addr = alloca i32, align 4
+; OPTIMIZATION_LEVEL_0-NEXT: store i32 %x, i32* %x.addr, align 4
+; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_0-NEXT: store i32 %y, i32* %y.addr, align 4
+; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata i32* %y.addr, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_0-NEXT: %0 = tail call i32 @maxA(i32 %x, i32 %y), !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_0-NEXT: ret i32 %0, !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_0-NEXT: }
+
+; OPTIMIZATION_LEVEL_2: define i32 @maxB(i32 %x, i32 %y)
+; OPTIMIZATION_LEVEL_2-NEXT: entry:
+; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 %x, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 %y, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_2-NEXT: %0 = tail call i32 @maxA(i32 %x, i32 %y) #{{[0-9]+}}, !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_2-NEXT: ret i32 %0, !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_2-NEXT: }
+
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %m = alloca i32, align 4
+  %j = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !35, metadata !12), !dbg !36
+  store i32 %y, i32* %y.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %y.addr, metadata !37, metadata !12), !dbg !38
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !39, metadata !12), !dbg !40
+  call void @llvm.dbg.declare(metadata i32* %m, metadata !41, metadata !12), !dbg !42
+  call void @llvm.dbg.declare(metadata i32* %j, metadata !43, metadata !12), !dbg !44
+  %0 = load i32, i32* %x.addr, align 4, !dbg !45
+  %1 = load i32, i32* %y.addr, align 4, !dbg !47
+  %cmp = icmp sgt i32 %0, %1, !dbg !48
+  br i1 %cmp, label %if.then, label %if.else, !dbg !49
+
+if.then:                                          ; preds = %entry
+  %2 = load i32, i32* %x.addr, align 4, !dbg !50
+  store i32 %2, i32* %m, align 4, !dbg !51
+  br label %if.end, !dbg !52
+
+if.else:                                          ; preds = %entry
+  %3 = load i32, i32* %y.addr, align 4, !dbg !53
+  store i32 %3, i32* %m, align 4, !dbg !54
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %4 = load i32, i32* %m, align 4, !dbg !55
+  ret i32 %4, !dbg !56
+}
+
+; Function Attrs: nounwind uwtable
+define void @f() !dbg !57 {
+entry:
+
+; OPTIMIZATION_LEVEL_0: define void @f()
+; OPTIMIZATION_LEVEL_0-NEXT: entry:
+; OPTIMIZATION_LEVEL_0-NEXT: %call = call i32 @maxA(i32 3, i32 4), !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_0-NEXT: %call1 = call i32 @maxB(i32 1, i32 9), !dbg !{{[0-9]+}}
+; OPTIMIZATION_LEVEL_0-NEXT: ret void, !dbg !{{[0-9]+}}
+
+; OPTIMIZATION_LEVEL_2: define void @f()
+; OPTIMIZATION_LEVEL_2-NEXT: entry:
+; OPTIMIZATION_LEVEL_2-NEXT: ret void, !dbg !{{[0-9]+}}
+
+  %call = call i32 @maxA(i32 3, i32 4), !dbg !60
+  %call1 = call i32 @maxB(i32 1, i32 9), !dbg !61
+  ret void, !dbg !62
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "mergefunc-preserve-debug-info.c", directory: "")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!""}
+!6 = distinct !DISubprogram(name: "maxA", scope: !7, file: !7, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!7 = !DIFile(filename: "./mergefunc-preserve-debug-info.c", directory: "")
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !10, !10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "x", arg: 1, scope: !6, file: !7, line: 1, type: !10)
+!12 = !DIExpression()
+!13 = !DILocation(line: 1, column: 14, scope: !6)
+!14 = !DILocalVariable(name: "y", arg: 2, scope: !6, file: !7, line: 1, type: !10)
+!15 = !DILocation(line: 1, column: 21, scope: !6)
+!16 = !DILocalVariable(name: "i", scope: !6, file: !7, line: 2, type: !10)
+!17 = !DILocation(line: 2, column: 7, scope: !6)
+!18 = !DILocalVariable(name: "m", scope: !6, file: !7, line: 2, type: !10)
+!19 = !DILocation(line: 2, column: 10, scope: !6)
+!20 = !DILocalVariable(name: "j", scope: !6, file: !7, line: 2, type: !10)
+!21 = !DILocation(line: 2, column: 13, scope: !6)
+!22 = !DILocation(line: 3, column: 7, scope: !23)
+!23 = distinct !DILexicalBlock(scope: !6, file: !7, line: 3, column: 7)
+!24 = !DILocation(line: 3, column: 11, scope: !23)
+!25 = !DILocation(line: 3, column: 9, scope: !23)
+!26 = !DILocation(line: 3, column: 7, scope: !6)
+!27 = !DILocation(line: 4, column: 9, scope: !23)
+!28 = !DILocation(line: 4, column: 7, scope: !23)
+!29 = !DILocation(line: 4, column: 5, scope: !23)
+!30 = !DILocation(line: 6, column: 9, scope: !23)
+!31 = !DILocation(line: 6, column: 7, scope: !23)
+!32 = !DILocation(line: 7, column: 10, scope: !6)
+!33 = !DILocation(line: 7, column: 3, scope: !6)
+!34 = distinct !DISubprogram(name: "maxB", scope: !7, file: !7, line: 10, type: !8, isLocal: false, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!35 = !DILocalVariable(name: "x", arg: 1, scope: !34, file: !7, line: 10, type: !10)
+!36 = !DILocation(line: 10, column: 14, scope: !34)
+!37 = !DILocalVariable(name: "y", arg: 2, scope: !34, file: !7, line: 10, type: !10)
+!38 = !DILocation(line: 10, column: 21, scope: !34)
+!39 = !DILocalVariable(name: "i", scope: !34, file: !7, line: 11, type: !10)
+!40 = !DILocation(line: 11, column: 7, scope: !34)
+!41 = !DILocalVariable(name: "m", scope: !34, file: !7, line: 11, type: !10)
+!42 = !DILocation(line: 11, column: 10, scope: !34)
+!43 = !DILocalVariable(name: "j", scope: !34, file: !7, line: 11, type: !10)
+!44 = !DILocation(line: 11, column: 13, scope: !34)
+!45 = !DILocation(line: 12, column: 7, scope: !46)
+!46 = distinct !DILexicalBlock(scope: !34, file: !7, line: 12, column: 7)
+!47 = !DILocation(line: 12, column: 11, scope: !46)
+!48 = !DILocation(line: 12, column: 9, scope: !46)
+!49 = !DILocation(line: 12, column: 7, scope: !34)
+!50 = !DILocation(line: 13, column: 9, scope: !46)
+!51 = !DILocation(line: 13, column: 7, scope: !46)
+!52 = !DILocation(line: 13, column: 5, scope: !46)
+!53 = !DILocation(line: 15, column: 9, scope: !46)
+!54 = !DILocation(line: 15, column: 7, scope: !46)
+!55 = !DILocation(line: 16, column: 10, scope: !34)
+!56 = !DILocation(line: 16, column: 3, scope: !34)
+!57 = distinct !DISubprogram(name: "f", scope: !7, file: !7, line: 19, type: !58, isLocal: false, isDefinition: true, scopeLine: 19, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!58 = !DISubroutineType(types: !59)
+!59 = !{null}
+!60 = !DILocation(line: 21, column: 3, scope: !57)
+!61 = !DILocation(line: 22, column: 3, scope: !57)
+!62 = !DILocation(line: 23, column: 1, scope: !57)

Added: llvm/trunk/test/Transforms/MergeFunc/mergefunc-struct-return.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/mergefunc-struct-return.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/mergefunc-struct-return.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/mergefunc-struct-return.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+
+; This test makes sure that the mergefunc pass, uses extract and insert value
+; to convert the struct result type; as struct types cannot be bitcast.
+
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+
+%kv1 = type { i32*, i32* }
+%kv2 = type { i8*, i8* }
+
+declare void @noop()
+
+define %kv1 @fn1() {
+; CHECK-LABEL: @fn1(
+  %tmp = alloca %kv1
+  %v1 = getelementptr %kv1, %kv1* %tmp, i32 0, i32 0
+  store i32* null, i32** %v1
+  %v2 = getelementptr %kv1, %kv1* %tmp, i32 0, i32 0
+  store i32* null, i32** %v2
+  call void @noop()
+  %v3 = load %kv1, %kv1* %tmp
+  ret %kv1 %v3
+}
+
+define %kv2 @fn2() {
+; CHECK-LABEL: @fn2(
+; CHECK: %1 = tail call %kv1 @fn1()
+; CHECK: %2 = extractvalue %kv1 %1, 0
+; CHECK: %3 = bitcast i32* %2 to i8*
+; CHECK: %4 = insertvalue %kv2 undef, i8* %3, 0
+  %tmp = alloca %kv2
+  %v1 = getelementptr %kv2, %kv2* %tmp, i32 0, i32 0
+  store i8* null, i8** %v1
+  %v2 = getelementptr %kv2, %kv2* %tmp, i32 0, i32 0
+  store i8* null, i8** %v2
+  call void @noop()
+
+  %v3 = load %kv2, %kv2* %tmp
+  ret %kv2 %v3
+}

Added: llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,96 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; There is a slight different in these two functions, in that the label values
+; are switched. They are thus not mergeable. This tests that block addresses
+; referring to blocks within each respective compared function are correctly
+; ordered.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1fi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1fi
+; CHECK-NEXT: entry:
+; CHECK-NEXT: alloca
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+; Right here, this is val_0, and later the if might assign val_1
+  store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1gi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1gi
+; CHECK-NEXT: entry:
+; CHECK-NEXT: alloca
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+; This time, we store val_1 initially, and later the if might assign val_0
+  store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_1, label %val_0]
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,61 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; We should not merge these two functions, because the blocks are different.
+; This tests the handling of block addresses from different functions.
+; ModuleID = '<stdin>'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+define internal i8* @Afunc(i32* %P) {
+; CHECK-LABEL: @Afunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then)
+}
+
+define internal i8* @Bfunc(i32* %P) {
+; CHECK-LABEL: @Bfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
+
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 3, i32* %retval
+  br label %return
+
+if.end:
+  %1 = load i32, i32* %i.addr, align 4
+  %cmp1 = icmp eq i32 %1, 3
+  br i1 %cmp1, label %if.then.2, label %if.end.3
+
+if.then.2:
+  store i32 56, i32* %retval
+  br label %return
+
+if.end.3:
+  store i32 0, i32* %retval
+  br label %return
+
+return:
+  %2 = load i32, i32* %retval
+  ret i32 %2
+}

Added: llvm/trunk/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, as the datalayout says a pointer is 64 bits. No
+; sext/zext is specified, so these functions could lower differently.
+define internal i32 @Ffunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i32 @Ffunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 1, i32* %P
+  store i32 3, i32* %Q
+  ret i32 0
+}
+
+define internal i64* @Gfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64* @Gfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 1, i32* %P
+  store i32 3, i32* %Q
+  ret i64* null
+}

Added: llvm/trunk/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, as 1 != 0.
+define internal i64 @Ifunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Ifunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 10, i32* %P
+  store i32 10, i32* %Q
+  ret i64 1
+}
+
+define internal i64* @Jfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64* @Jfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 10, i32* %P
+  store i32 10, i32* %Q
+  ret i64* null
+}

Added: llvm/trunk/test/Transforms/MergeFunc/nonzero-address-spaces.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/nonzero-address-spaces.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/nonzero-address-spaces.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/nonzero-address-spaces.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; MergeFunctions should respect the default function address
+; space specified in the data layout.
+
+target datalayout = "e-P1-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+; CHECK-LABEL: @f0(
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+; CHECK-LABEL: @f1(
+; CHECK: ptrtoint i64*
+; CHECK: tail call addrspace(1) void @f0(i64
+
+define void @f1(i64* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/phi-check-blocks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/phi-check-blocks.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/phi-check-blocks.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/phi-check-blocks.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,50 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; Ensure that we do not merge functions that are identical with the
+; exception of the order of the incoming blocks to a phi.
+
+; CHECK-LABEL: define linkonce_odr hidden i1 @first(i2)
+define linkonce_odr hidden i1 @first(i2) {
+entry:
+; CHECK: switch i2
+  switch i2 %0, label %default [
+    i2 0, label %L1
+    i2 1, label %L2
+    i2 -2, label %L3
+  ]
+default:
+  unreachable
+L1:
+  br label %done
+L2:
+  br label %done
+L3:
+  br label %done
+done:
+  %result = phi i1 [ true, %L1 ], [ false, %L2 ], [ false, %L3 ]
+; CHECK: ret i1
+  ret i1 %result
+}
+
+; CHECK-LABEL: define linkonce_odr hidden i1 @second(i2)
+define linkonce_odr hidden i1 @second(i2) {
+entry:
+; CHECK: switch i2
+  switch i2 %0, label %default [
+    i2 0, label %L1
+    i2 1, label %L2
+    i2 -2, label %L3
+  ]
+default:
+  unreachable
+L1:
+  br label %done
+L2:
+  br label %done
+L3:
+  br label %done
+done:
+  %result = phi i1 [ true, %L3 ], [ false, %L2 ], [ false, %L1 ]
+; CHECK: ret i1
+  ret i1 %result
+}

Added: llvm/trunk/test/Transforms/MergeFunc/phi-speculation1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/phi-speculation1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/phi-speculation1.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/phi-speculation1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; REQUIRES: asserts
+; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | not grep "functions merged"
+
+define i32 @foo1(i32 %x) {
+entry:
+  %A = add i32 %x, 1
+  %B = call i32 @foo1(i32 %A)
+  br label %loop
+loop:
+  %C = phi i32 [%B, %entry], [%D, %loop]
+  %D = add i32 %x, 2
+  %E = icmp ugt i32 %D, 10000
+  br i1 %E, label %loopexit, label %loop
+loopexit:
+  ret i32 %D
+}
+
+define i32 @foo2(i32 %x) {
+entry:
+  %0 = add i32 %x, 1
+  %1 = call i32 @foo2(i32 %0)
+  br label %loop
+loop:
+  %2 = phi i32 [%1, %entry], [%3, %loop]
+  %3 = add i32 %2, 2
+  %4 = icmp ugt i32 %3, 10000
+  br i1 %4, label %loopexit, label %loop
+loopexit:
+  ret i32 %3
+}

Added: llvm/trunk/test/Transforms/MergeFunc/phi-speculation2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/phi-speculation2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/phi-speculation2.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/phi-speculation2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; REQUIRES: asserts
+; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | grep "functions merged"
+
+define i32 @foo1(i32 %x) {
+entry:
+  %A = add i32 %x, 1
+  %B = call i32 @foo1(i32 %A)
+  br label %loop
+loop:
+  %C = phi i32 [%B, %entry], [%D, %loop]
+  %D = add i32 %C, 2
+  %E = icmp ugt i32 %D, 10000
+  br i1 %E, label %loopexit, label %loop
+loopexit:
+  ret i32 %D
+}
+
+define i32 @foo2(i32 %x) {
+entry:
+  %0 = add i32 %x, 1
+  %1 = call i32 @foo2(i32 %0)
+  br label %loop
+loop:
+  %2 = phi i32 [%1, %entry], [%3, %loop]
+  %3 = add i32 %2, 2
+  %4 = icmp ugt i32 %3, 10000
+  br i1 %4, label %loopexit, label %loop
+loopexit:
+  ret i32 %3
+}

Added: llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt -S -mergefunc < %s | not grep "functions merged"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+define void @f2(i64 addrspace(1)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+; CHECK-LABEL: @f0
+; CHECK:  %2 = ptrtoint i64* %0 to i64
+; CHECK:  tail call void @f0(i64 %2)
+; CHECK:  ret void
+define void @f1(i64 addrspace(0)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt -S -mergefunc < %s | not grep "functions merged"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 addrspace(0)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+define void @f2(i64 addrspace(1)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/ranges-multiple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/ranges-multiple.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/ranges-multiple.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/ranges-multiple.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+define i1 @cmp_with_range(i8*, i8*) {
+  %v1 = load i8, i8* %0, !range !0
+  %v2 = load i8, i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_no_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_no_range
+; CHECK-NEXT: %v1 = load i8, i8* %0
+; CHECK-NEXT:  %v2 = load i8, i8* %1
+; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT:  ret i1 %out
+  %v1 = load i8, i8* %0
+  %v2 = load i8, i8* %1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_different_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_different_range
+; CHECK-NEXT:  %v1 = load i8, i8* %0, !range !1
+; CHECK-NEXT:  %v2 = load i8, i8* %1, !range !1
+; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT:  ret i1 %out
+  %v1 = load i8, i8* %0, !range !1
+  %v2 = load i8, i8* %1, !range !1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_with_same_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_with_same_range
+; CHECK: tail call i1 @cmp_with_range
+  %v1 = load i8, i8* %0, !range !0
+  %v2 = load i8, i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+; The comparison must check every element of the range, not just the first pair.
+!0 = !{i8 0, i8 2, i8 21, i8 30}
+!1 = !{i8 0, i8 2, i8 21, i8 25}

Added: llvm/trunk/test/Transforms/MergeFunc/ranges.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/ranges.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/ranges.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/ranges.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,43 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+define i1 @cmp_with_range(i8*, i8*) {
+  %v1 = load i8, i8* %0, !range !0
+  %v2 = load i8, i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_no_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_no_range
+; CHECK-NEXT: %v1 = load i8, i8* %0
+; CHECK-NEXT:  %v2 = load i8, i8* %1
+; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT:  ret i1 %out
+  %v1 = load i8, i8* %0
+  %v2 = load i8, i8* %1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_different_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_different_range
+; CHECK-NEXT:  %v1 = load i8, i8* %0, !range !1
+; CHECK-NEXT:  %v2 = load i8, i8* %1, !range !1
+; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT:  ret i1 %out
+  %v1 = load i8, i8* %0, !range !1
+  %v2 = load i8, i8* %1, !range !1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_with_same_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_with_same_range
+; CHECK: tail call i1 @cmp_with_range
+  %v1 = load i8, i8* %0, !range !0
+  %v2 = load i8, i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+!0 = !{i8 0, i8 2}
+!1 = !{i8 5, i8 7}

Added: llvm/trunk/test/Transforms/MergeFunc/self-referential-global.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/self-referential-global.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/self-referential-global.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/self-referential-global.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt -mergefunc -disable-output < %s
+
+; A linked list type and simple payload
+%LL = type { %S, %LL* }
+%S = type { void (%S*, i32)* }
+
+; Table refers to itself via GEP
+ at Table = internal global [3 x %LL] [%LL { %S { void (%S*, i32)* @B }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }, %LL { %S { void (%S*, i32)* @A }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }, %LL { %S { void (%S*, i32)* @A }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }], align 16
+
+; The body of this is irrelevant; it is long so that mergefunc doesn't skip it as a small function.
+define internal void @A(%S* %self, i32 %a) {
+  %1 = add i32 %a, 32
+  %2 = add i32 %1, 32
+  %3 = add i32 %2, 32
+  %4 = add i32 %3, 32
+  %5 = add i32 %4, 32
+  %6 = add i32 %5, 32
+  %7 = add i32 %6, 32
+  %8 = add i32 %7, 32
+  %9 = add i32 %8, 32
+  %10 = add i32 %9, 32
+  %11 = add i32 %10, 32
+  ret void
+}
+
+define internal void @B(%S* %self, i32 %a) {
+  %1 = add i32 %a, 32
+  %2 = add i32 %1, 32
+  %3 = add i32 %2, 32
+  %4 = add i32 %3, 32
+  %5 = add i32 %4, 32
+  %6 = add i32 %5, 32
+  %7 = add i32 %6, 32
+  %8 = add i32 %7, 32
+  %9 = add i32 %8, 32
+  %10 = add i32 %9, 32
+  %11 = add i32 %10, 32
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/tailcall.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/tailcall.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/tailcall.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/tailcall.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+
+declare void @dummy()
+
+; CHECK-LABEL: define{{.*}}@foo
+; CHECK: call {{.*}}@dummy
+; CHECK: musttail {{.*}}@dummy
+define void @foo() {
+  call void @dummy()
+  musttail call void @dummy()
+  ret void
+}
+
+; CHECK-LABEL: define{{.*}}@bar
+; CHECK: call {{.*}}@dummy
+; CHECK: call {{.*}}@dummy
+define void @bar() {
+  call void @dummy()
+  call void @dummy()
+  ret void
+}

Added: llvm/trunk/test/Transforms/MergeFunc/too-small.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/too-small.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/too-small.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/too-small.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,14 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+define void @foo(i32 %x) {
+; CHECK-LABEL: @foo(
+; CHECK-NOT: call
+  ret void
+}
+
+define void @bar(i32 %x) {
+; CHECK-LABEL: @bar(
+; CHECK-NOT: call
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/MergeFunc/undef-different-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/undef-different-types.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/undef-different-types.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/undef-different-types.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Cfunc and Dfunc differ only in that one returns i64, the other a pointer, and
+; both return undef. They should be merged. Note undef cannot be merged with
+; anything else, because this implies the ordering will be inconsistent (i.e.
+; -1 == undef and undef == 1, but -1 < 1, so we must have undef != <any int>).
+define internal i64 @Cfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Cfunc
+  store i32 4, i32* %P
+  store i32 6, i32* %Q
+  ret i64 undef
+}
+
+define internal i64* @Dfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Dfunc
+  store i32 4, i32* %P
+  store i32 6, i32* %Q
+  ret i64* undef
+}

Added: llvm/trunk/test/Transforms/MergeFunc/unnamed-addr-reprocessing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/unnamed-addr-reprocessing.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/unnamed-addr-reprocessing.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/unnamed-addr-reprocessing.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,35 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; After test3 and test4 have been merged, we should detect that
+; test1 and test2 can also be merged.
+
+; CHECK: define void @test4() unnamed_addr
+; CHECK-NEXT: tail call void @test3()
+; CHECK: define void @test2() unnamed_addr
+; CHECK-NEXT: tail call void @test1()
+
+declare void @dummy()
+  
+define void @test1() unnamed_addr {
+    call void @test3()
+    call void @test3()
+    ret void
+}
+
+define void @test2() unnamed_addr {
+    call void @test4()
+    call void @test4()
+    ret void
+}
+
+define void @test3() unnamed_addr {
+    call void @dummy()
+    call void @dummy()
+    ret void
+}
+
+define void @test4() unnamed_addr {
+    call void @dummy()
+    call void @dummy()
+    ret void
+}

Added: llvm/trunk/test/Transforms/MergeFunc/va_arg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/va_arg.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/va_arg.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/va_arg.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,93 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+; RUN: opt -S -mergefunc -mergefunc-use-aliases < %s | FileCheck %s -check-prefix=ALIAS
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; ALIAS: @_Z9simple_vaPKcz = unnamed_addr alias void (i8*, ...), void (i8*, ...)* @_Z10simple_va2PKcz
+; ALIAS-NOT: @_Z9simple_vaPKcz
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+; CHECK-LABEL: define {{.*}}@_Z9simple_vaPKcz
+; CHECK: call void @llvm.va_start
+; CHECK: call void @llvm.va_end
+define dso_local void @_Z9simple_vaPKcz(i8* nocapture readnone, ...) unnamed_addr {
+  %2 = alloca [1 x %struct.__va_list_tag], align 16
+  %3 = bitcast [1 x %struct.__va_list_tag]* %2 to i8*
+  call void @llvm.va_start(i8* nonnull %3)
+  %4 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %2, i64 0, i64 0, i32 0
+  %5 = load i32, i32* %4, align 16
+  %6 = icmp ult i32 %5, 41
+  br i1 %6, label %7, label %13
+
+; <label>:7:                                      ; preds = %1
+  %8 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %2, i64 0, i64 0, i32 3
+  %9 = load i8*, i8** %8, align 16
+  %10 = sext i32 %5 to i64
+  %11 = getelementptr i8, i8* %9, i64 %10
+  %12 = add i32 %5, 8
+  store i32 %12, i32* %4, align 16
+  br label %17
+
+; <label>:13:                                     ; preds = %1
+  %14 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %2, i64 0, i64 0, i32 2
+  %15 = load i8*, i8** %14, align 8
+  %16 = getelementptr i8, i8* %15, i64 8
+  store i8* %16, i8** %14, align 8
+  br label %17
+
+; <label>:17:                                     ; preds = %13, %7
+  %18 = phi i8* [ %11, %7 ], [ %15, %13 ]
+  %19 = bitcast i8* %18 to i32*
+  %20 = load i32, i32* %19, align 4
+  call void @_Z6escapei(i32 %20)
+  call void @llvm.va_end(i8* nonnull %3)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*)
+
+; Function Attrs: minsize optsize
+declare dso_local void @_Z6escapei(i32) local_unnamed_addr
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*)
+
+; CHECK-LABEL: define {{.*}}@_Z10simple_va2PKcz
+; CHECK: call void @llvm.va_start
+; CHECK: call void @llvm.va_end
+define dso_local void @_Z10simple_va2PKcz(i8* nocapture readnone, ...) unnamed_addr {
+  %2 = alloca [1 x %struct.__va_list_tag], align 16
+  %3 = bitcast [1 x %struct.__va_list_tag]* %2 to i8*
+  call void @llvm.va_start(i8* nonnull %3)
+  %4 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %2, i64 0, i64 0, i32 0
+  %5 = load i32, i32* %4, align 16
+  %6 = icmp ult i32 %5, 41
+  br i1 %6, label %7, label %13
+
+; <label>:7:                                      ; preds = %1
+  %8 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %2, i64 0, i64 0, i32 3
+  %9 = load i8*, i8** %8, align 16
+  %10 = sext i32 %5 to i64
+  %11 = getelementptr i8, i8* %9, i64 %10
+  %12 = add i32 %5, 8
+  store i32 %12, i32* %4, align 16
+  br label %17
+
+; <label>:13:                                     ; preds = %1
+  %14 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %2, i64 0, i64 0, i32 2
+  %15 = load i8*, i8** %14, align 8
+  %16 = getelementptr i8, i8* %15, i64 8
+  store i8* %16, i8** %14, align 8
+  br label %17
+
+; <label>:17:                                     ; preds = %13, %7
+  %18 = phi i8* [ %11, %7 ], [ %15, %13 ]
+  %19 = bitcast i8* %18 to i32*
+  %20 = load i32, i32* %19, align 4
+  call void @_Z6escapei(i32 %20)
+  call void @llvm.va_end(i8* nonnull %3)
+  ret void
+}

Added: llvm/trunk/test/Transforms/MergeFunc/vector-GEP-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/vector-GEP-crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/vector-GEP-crash.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/vector-GEP-crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,12 @@
+; RUN: opt -mergefunc -disable-output < %s
+; This used to cause a crash when compairing the GEPs
+
+define void @foo(<2 x i64*>) {
+  %tmp = getelementptr i64, <2 x i64*> %0, <2 x i64> <i64 0, i64 0>
+  ret void
+}
+
+define void @bar(<2 x i64*>) {
+  %tmp = getelementptr i64, <2 x i64*> %0, <2 x i64> <i64 0, i64 0>
+  ret void
+}

Added: llvm/trunk/test/Transforms/MergeFunc/vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/vector.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/vector.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/vector.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,72 @@
+; REQUIRES: asserts
+; RUN: opt -mergefunc -stats -disable-output < %s 2>&1 | grep "functions merged"
+
+; This test is checks whether we can merge
+;   vector<intptr_t>::push_back(0)
+; and
+;   vector<void *>::push_back(0)
+; .
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%0 = type { i32, void ()* }
+%1 = type { i64, i1 }
+%"class.std::vector" = type { [24 x i8] }
+
+ at vi = global %"class.std::vector" zeroinitializer, align 8
+ at __dso_handle = external unnamed_addr global i8*
+ at vp = global %"class.std::vector" zeroinitializer, align 8
+ at llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @_GLOBAL__I_a }]
+
+define linkonce_odr void @_ZNSt6vectorIlSaIlEED1Ev(%"class.std::vector"* nocapture %this) unnamed_addr align 2 {
+entry:
+  %tmp2.i.i = bitcast %"class.std::vector"* %this to i64**
+  %tmp3.i.i = load i64*, i64** %tmp2.i.i, align 8
+  %tobool.i.i.i = icmp eq i64* %tmp3.i.i, null
+  br i1 %tobool.i.i.i, label %_ZNSt6vectorIlSaIlEED2Ev.exit, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %entry
+  %0 = bitcast i64* %tmp3.i.i to i8*
+  tail call void @_ZdlPv(i8* %0) nounwind
+  ret void
+
+_ZNSt6vectorIlSaIlEED2Ev.exit:                    ; preds = %entry
+  ret void
+}
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
+
+define linkonce_odr void @_ZNSt6vectorIPvSaIS0_EED1Ev(%"class.std::vector"* nocapture %this) unnamed_addr align 2 {
+entry:
+  %tmp2.i.i = bitcast %"class.std::vector"* %this to i8***
+  %tmp3.i.i = load i8**, i8*** %tmp2.i.i, align 8
+  %tobool.i.i.i = icmp eq i8** %tmp3.i.i, null
+  br i1 %tobool.i.i.i, label %_ZNSt6vectorIPvSaIS0_EED2Ev.exit, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %entry
+  %0 = bitcast i8** %tmp3.i.i to i8*
+  tail call void @_ZdlPv(i8* %0) nounwind
+  ret void
+
+_ZNSt6vectorIPvSaIS0_EED2Ev.exit:                 ; preds = %entry
+  ret void
+}
+
+declare void @_Z1fv()
+
+declare void @_ZNSt6vectorIPvSaIS0_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS0_S2_EERKS0_(%"class.std::vector"* nocapture %this, i8** %__position.coerce, i8** nocapture %__x) align 2
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+declare void @_ZSt17__throw_bad_allocv() noreturn
+
+declare noalias i8* @_Znwm(i64)
+
+declare void @_ZNSt6vectorIlSaIlEE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPlS1_EERKl(%"class.std::vector"* nocapture %this, i64* %__position.coerce, i64* nocapture %__x) align 2
+
+declare void @_GLOBAL__I_a()
+
+declare %1 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone

Added: llvm/trunk/test/Transforms/MergeFunc/vectors-and-arrays.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/vectors-and-arrays.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/vectors-and-arrays.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/vectors-and-arrays.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,19 @@
+; REQUIRES: asserts
+; RUN: opt -mergefunc < %s -disable-output -stats | not grep merged
+; This used to crash with an assert.
+
+define <2 x i8> @v1(<2 x i8> %x) {
+  ret <2 x i8> %x
+}
+
+define <4 x i8> @v2(<4 x i8> %x) {
+  ret <4 x i8> %x
+}
+
+define [2 x i8] @a1([2 x i8] %x) {
+  ret [2 x i8] %x
+}
+
+define [4 x i8] @a2([4 x i8] %x) {
+  ret [4 x i8] %x
+}

Added: llvm/trunk/test/Transforms/MergeFunc/weak-small.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeFunc/weak-small.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeFunc/weak-small.ll (added)
+++ llvm/trunk/test/Transforms/MergeFunc/weak-small.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+
+; Weak functions too small for merging to be profitable
+
+; CHECK: define weak i32 @foo(i8*, i32)
+; CHECK-NEXT: ret i32 %1
+; CHECK: define weak i32 @bar(i8*, i32)
+; CHECK-NEXT: ret i32 %1
+
+define weak i32 @foo(i8*, i32) #0 {
+    ret i32 %1
+}
+
+define weak i32 @bar(i8*, i32) #0 {
+    ret i32 %1
+}

Added: llvm/trunk/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -S | FileCheck %s --check-prefix=X86
+
+%"struct.std::pair" = type { i32, i32, i32, i32 }
+
+; X86-LABEL: @opeq1(
+; X86-NEXT:  entry:
+; X86-NEXT:    [[PTR:%.*]] = alloca i32
+; X86-NEXT:    store i32 42, i32* [[PTR]]
+; X86-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
+; X86-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
+; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8*
+; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8*
+; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16)
+; X86-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
+; X86:       opeq1.exit:
+; X86-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ]
+; X86-NEXT:    ret i1 [[TMP1]]
+
+define zeroext i1 @opeq1(
+
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+
+entry:
+  %ptr = alloca i32
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %1 = load i32, i32* %first1.i, align 4
+  ; Does other work, has no interference, merge block
+  store i32 42, i32* %ptr
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp2.i = icmp eq i32 %2, %3
+  br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit
+
+land.rhs.i.2:
+  %third.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 2
+  %4 = load i32, i32* %third.i, align 4
+  %third2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 2
+  %5 = load i32, i32* %third2.i, align 4
+  %cmp3.i = icmp eq i32 %4, %5
+  br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit
+
+land.rhs.i.3:
+  %fourth.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 3
+  %6 = load i32, i32* %fourth.i, align 4
+  %fourth2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3
+  %7 = load i32, i32* %fourth2.i, align 4
+  %cmp4.i = icmp eq i32 %6, %7
+  br label %opeq1.exit
+
+opeq1.exit:
+  %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ]
+  ret i1 %8
+}

Added: llvm/trunk/test/Transforms/MergeICmps/X86/atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/atomic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/atomic.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/atomic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
+
+%"struct.std::pair" = type { i32, i32 }
+
+define zeroext i1 @opeq(
+; CHECK-LABEL: @opeq(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
+; CHECK-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
+; CHECK:       land.rhs.i:
+; CHECK-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load atomic i32, i32* [[SECOND_I]] seq_cst, align 4
+; CHECK-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
+; CHECK-NEXT:    [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    br label [[OPEQ1_EXIT]]
+; CHECK:       opeq1.exit:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %1 = load i32, i32* %first1.i, align 4
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %2 = load atomic i32, i32* %second.i seq_cst, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp3.i = icmp eq i32 %2, %3
+  br label %opeq1.exit
+
+opeq1.exit:
+  %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
+  ret i1 %4
+}

Added: llvm/trunk/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
+
+%"struct.std::pair" = type { i32, i32, i32, i32 }
+
+; The entry block is part of the chain. It however can not be merged. We need to make the
+; first comparison block in the chain the new entry block of the function.
+
+define zeroext i1 @opeq1(
+; CHECK-LABEL: @opeq1(
+; CHECK-NEXT:    br label [[LAND_RHS_I:%.*]]
+; CHECK:       land.rhs.i:
+; CHECK-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 
+; CHECK-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+; CHECK-NEXT:    [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8*
+; CHECK-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8*
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
+;
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 3 
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 2 
+  %1 = load i32, i32* %first1.i, align 4
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp3.i = icmp eq i32 %2, %3
+  br i1 %cmp3.i, label %land.rhs.i.2, label %opeq1.exit
+
+land.rhs.i.2:
+  %third.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 
+  %4 = load i32, i32* %third.i, align 4
+  %third2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %5 = load i32, i32* %third2.i, align 4
+  %cmp4.i = icmp eq i32 %4, %5
+  br i1 %cmp4.i, label %land.rhs.i.3, label %opeq1.exit
+
+land.rhs.i.3:
+  %fourth.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 3 
+  %6 = load i32, i32* %fourth.i, align 4
+  %fourth2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3
+  %7 = load i32, i32* %fourth2.i, align 4
+  %cmp5.i = icmp eq i32 %6, %7
+  br label %opeq1.exit
+
+opeq1.exit:
+  %8 = phi i1 [ false, %entry ], [ false,  %land.rhs.i], [ false, %land.rhs.i.2 ], [ %cmp5.i, %land.rhs.i.3 ]
+  ret i1 %8
+}

Added: llvm/trunk/test/Transforms/MergeICmps/X86/gep-used-outside.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/gep-used-outside.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/gep-used-outside.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/gep-used-outside.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
+
+%"struct.std::pair" = type { i32, i32 }
+
+; Check that the transformation is avoided when GEP has a use outside of the
+; parant block of the load instruction.
+
+define zeroext i32 @opeq1(
+; CHECK-LABEL: @opeq1(
+; CHECK-NOT:    [[MEMCMP:%.*]] = call i32 @memcmp
+
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 
+  %1 = load i32, i32* %first1.i, align 4
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp3.i = icmp eq i32 %2, %3
+  br label %opeq1.exit
+
+opeq1.exit:
+  %4 = phi i1 [ false, %entry ], [ %cmp3.i,  %land.rhs.i]
+  %5 = load i32, i32* %first.i, align 4
+  %6 = select i1 %4, i32 %5, i32 0
+  ret i32 %6
+}

Added: llvm/trunk/test/Transforms/MergeICmps/X86/int64-and-ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/int64-and-ptr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/int64-and-ptr.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/int64-and-ptr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -S | FileCheck %s --check-prefix=X86
+
+; 8-byte int and 8-byte pointer should merge into a 16-byte memcpy.
+; X86: memcmp(i8* {{.*}}, i8* {{.*}}, i64 16)
+
+%struct.outer = type { i64, %struct.inner* }
+%struct.inner = type { i32, i32, i32 }
+
+; Function Attrs: nounwind uwtable
+define dso_local i1 @"?foo@@YAHAEAUouter@@0 at Z"(%struct.outer* align 8 dereferenceable(16) %o1, %struct.outer* align 8 dereferenceable(116) %o2) local_unnamed_addr #0 {
+entry:
+  %p1 = getelementptr inbounds %struct.outer, %struct.outer* %o1, i64 0, i32 0
+  %0 = load i64, i64* %p1, align 8
+  %p11 = getelementptr inbounds %struct.outer, %struct.outer* %o2, i64 0, i32 0
+  %1 = load i64, i64* %p11, align 8
+  %cmp = icmp eq i64 %0, %1
+  br i1 %cmp, label %if.then, label %if.end5
+
+if.then:                                          ; preds = %entry
+  %p2 = getelementptr inbounds %struct.outer, %struct.outer* %o1, i64 0, i32 1
+  %2 = load %struct.inner*, %struct.inner** %p2, align 8
+  %p22 = getelementptr inbounds %struct.outer, %struct.outer* %o2, i64 0, i32 1
+  %3 = load %struct.inner*, %struct.inner** %p22, align 8
+  %cmp3 = icmp eq %struct.inner* %2, %3
+  br label %if.end5
+
+if.end5:                                          ; preds = %if.then, %entry
+  %rez.0 = phi i1 [ %cmp3, %if.then ], [ false, %entry ]
+  ret i1 %rez.0
+}
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }

Added: llvm/trunk/test/Transforms/MergeICmps/X86/last-block-produce-no-value.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/last-block-produce-no-value.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/last-block-produce-no-value.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/last-block-produce-no-value.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
+
+%"struct.std::pair" = type { i32, i32, i32 }
+
+; Last block does not produce the non-constant value into the phi.
+; We could handle this case, but an easier way would be to allow other transformations such as
+; SimplifyCFG to remove %land.rhs.i.2 and turn the terminator of %land.rhs.i into an unconditional
+; branch.
+
+define zeroext i1 @opeq1(
+; CHECK-LABEL: @opeq1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
+; CHECK-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
+; CHECK:       land.rhs.i:
+; CHECK-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4
+; CHECK-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
+; CHECK-NEXT:    [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    br i1 [[CMP3_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
+; CHECK:       land.rhs.i.2:
+; CHECK-NEXT:    br label [[OPEQ1_EXIT]]
+; CHECK:       opeq1.exit:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %"struct.std::pair"* nocapture readonly dereferenceable(12) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(12) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %1 = load i32, i32* %first1.i, align 4
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp3.i = icmp eq i32 %2, %3
+  br i1 %cmp3.i, label %land.rhs.i.2, label %opeq1.exit
+
+land.rhs.i.2:
+  br label %opeq1.exit
+
+opeq1.exit:
+  %4 = phi i1 [ false, %entry ], [ false,  %land.rhs.i], [ %cmp3.i, %land.rhs.i.2 ]
+  ret i1 %4
+}

Added: llvm/trunk/test/Transforms/MergeICmps/X86/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
+
+%"struct.std::pair" = type { i32, i32, i32, i32 }
+
+declare void @foo(...)
+
+; We can discard %entry and %land.rhs.i, but still merge the last 2 blocks.
+define zeroext i1 @opeq1(
+; X86-LABEL: @opeq1(
+; X86:      land.rhs.i.2:
+; X86-NEXT:    [[THIRD_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 2 
+; X86-NEXT:    [[THIRD1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 2
+; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[THIRD_I]] to i8*
+; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[THIRD1_I]] to i8*
+; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
+; X86-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
+; X86:       opeq1.exit:
+; X86-NEXT:    [[TMP1:%.*]] = phi i1 [ false, %entry ], [ false, %land.rhs.i ], [ [[TMP0]], %land.rhs.i.2 ] 
+; X86-NEXT:    ret i1 [[TMP1]]
+;
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %1 = load i32, i32* %first1.i, align 4
+  ; Does other work.
+  call void (...) @foo()
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %3 = load i32, i32* %second2.i, align 4
+  ; Does other work.
+  call void (...) @foo()
+  %cmp2.i = icmp eq i32 %2, %3
+  br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit
+
+land.rhs.i.2:
+  %third.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 2 
+  %4 = load i32, i32* %third.i, align 4
+  %third2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 2
+  %5 = load i32, i32* %third2.i, align 4
+  %cmp3.i = icmp eq i32 %4, %5
+  br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit
+
+land.rhs.i.3:
+  %fourth.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 3
+  %6 = load i32, i32* %fourth.i, align 4
+  %fourth2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3
+  %7 = load i32, i32* %fourth2.i, align 4
+  %cmp4.i = icmp eq i32 %6, %7
+  br label %opeq1.exit
+
+opeq1.exit:
+  %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ]
+  ret i1 %8
+}

Added: llvm/trunk/test/Transforms/MergeICmps/X86/pair-int32-int32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/pair-int32-int32.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/pair-int32-int32.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/pair-int32-int32.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,131 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S -disable-simplify-libcalls | FileCheck %s --check-prefix=X86-NOBUILTIN
+
+%"struct.std::pair" = type { i32, i32 }
+
+define zeroext i1 @opeq1(
+; X86-LABEL: @opeq1(
+; X86-NEXT:  entry:
+; X86-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
+; X86-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
+; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8*
+; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8*
+; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
+; X86-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
+; X86:       opeq1.exit:
+; X86-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ]
+; X86-NEXT:    ret i1 [[TMP1]]
+;
+; X86-NOBUILTIN-LABEL: @opeq1(
+; X86-NOBUILTIN-NEXT:  entry:
+; X86-NOBUILTIN-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
+; X86-NOBUILTIN-NEXT:    [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
+; X86-NOBUILTIN-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
+; X86-NOBUILTIN-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
+; X86-NOBUILTIN-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; X86-NOBUILTIN-NEXT:    br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
+; X86-NOBUILTIN:       land.rhs.i:
+; X86-NOBUILTIN-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1
+; X86-NOBUILTIN-NEXT:    [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4
+; X86-NOBUILTIN-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1
+; X86-NOBUILTIN-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
+; X86-NOBUILTIN-NEXT:    [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; X86-NOBUILTIN-NEXT:    br label [[OPEQ1_EXIT]]
+; X86-NOBUILTIN:       opeq1.exit:
+; X86-NOBUILTIN-NEXT:    [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
+; X86-NOBUILTIN-NEXT:    ret i1 [[TMP4]]
+;
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %1 = load i32, i32* %first1.i, align 4
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp3.i = icmp eq i32 %2, %3
+  br label %opeq1.exit
+
+opeq1.exit:
+  %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
+  ret i1 %4
+; The entry block with zero-offset GEPs is kept, loads are removed.
+; The two 4 byte loads and compares are replaced with a single 8-byte memcmp.
+; The branch is now a direct branch; the other block has been removed.
+; The phi is updated.
+}
+
+; Same as above, but the two blocks are in inverse order.
+define zeroext i1 @opeq1_inverse(
+; X86-LABEL: @opeq1_inverse(
+; X86-NEXT:    br label [[LAND_RHS_I:%.*]]
+; X86:       land.rhs.i:
+; X86-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
+; X86-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
+; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8*
+; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8*
+; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
+; X86-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
+; X86:       opeq1.exit:
+; X86-NEXT:    [[TMP2:%.*]] = phi i1 [ [[TMP1]], [[LAND_RHS_I]] ]
+; X86-NEXT:    ret i1 [[TMP2]]
+;
+; X86-NOBUILTIN-LABEL: @opeq1_inverse(
+; X86-NOBUILTIN-NEXT:  entry:
+; X86-NOBUILTIN-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 1
+; X86-NOBUILTIN-NEXT:    [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
+; X86-NOBUILTIN-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 1
+; X86-NOBUILTIN-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
+; X86-NOBUILTIN-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; X86-NOBUILTIN-NEXT:    br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
+; X86-NOBUILTIN:       land.rhs.i:
+; X86-NOBUILTIN-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 0
+; X86-NOBUILTIN-NEXT:    [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4
+; X86-NOBUILTIN-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 0
+; X86-NOBUILTIN-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
+; X86-NOBUILTIN-NEXT:    [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; X86-NOBUILTIN-NEXT:    br label [[OPEQ1_EXIT]]
+; X86-NOBUILTIN:       opeq1.exit:
+; X86-NOBUILTIN-NEXT:    [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
+; X86-NOBUILTIN-NEXT:    ret i1 [[TMP4]]
+;
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %1 = load i32, i32* %first1.i, align 4
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp3.i = icmp eq i32 %2, %3
+  br label %opeq1.exit
+
+opeq1.exit:
+  %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
+  ret i1 %4
+; The second block with zero-offset GEPs is kept, loads are removed.
+; CHECK: land.rhs.i
+; The two 4 byte loads and compares are replaced with a single 8-byte memcmp.
+; The branch is now a direct branch; the other block has been removed.
+; The phi is updated.
+}
+
+
+

Added: llvm/trunk/test/Transforms/MergeICmps/X86/pr36557.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/pr36557.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/pr36557.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/pr36557.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
+
+source_filename = "qabstractitemmodeltester.cpp"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64--linux-gnu"
+
+%class.B = type { i32, i32 }
+%class.D = type { i32 }
+%class.C = type { i8 }
+%class.QMessageLogger = type { i8 }
+
+$_ZN1D7compareI1BS1_EEbRKT_RKT0_PKcS9_S9_i = comdat any
+
+ at .str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+; Function Attrs: uwtable
+define linkonce_odr dso_local zeroext i1 @_ZN1D7compareI1BS1_EEbRKT_RKT0_PKcS9_S9_i(%class.D* %this, %class.B* dereferenceable(8) %p1, %class.B* dereferenceable(8) %p2, i8*, i8*, i8*, i32) local_unnamed_addr #0 comdat align 2 {
+; X86-LABEL: @_ZN1D7compareI1BS1_EEbRKT_RKT0_PKcS9_S9_i(
+; X86-NEXT:  entry:
+; X86-NEXT:    [[REF_TMP:%.*]] = alloca [[CLASS_C:%.*]], align 1
+; X86-NEXT:    [[C_I:%.*]] = getelementptr inbounds [[CLASS_B:%.*]], %class.B* [[P2:%.*]], i64 0, i32 0
+; X86-NEXT:    [[TMP4:%.*]] = load i32, i32* [[C_I]], align 4
+; X86-NEXT:    [[C2_I:%.*]] = getelementptr inbounds [[CLASS_B]], %class.B* [[P1:%.*]], i64 0, i32 0
+; X86-NEXT:    [[TMP5:%.*]] = load i32, i32* [[C2_I]], align 4
+; X86-NEXT:    [[CMP_ENTRY:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X86-NEXT:    br i1 [[CMP_ENTRY]], label [[BB1:%.*]], label [[BB2:%.*]]
+; X86:       bb1:
+; X86-NEXT:    [[M_I:%.*]] = getelementptr inbounds [[CLASS_B]], %class.B* [[P2]], i64 0, i32 1
+; X86-NEXT:    [[TMP6:%.*]] = load i32, i32* [[M_I]], align 4
+; X86-NEXT:    [[M3_I:%.*]] = getelementptr inbounds [[CLASS_B]], %class.B* [[P1]], i64 0, i32 1
+; X86-NEXT:    [[TMP7:%.*]] = load i32, i32* [[M3_I]], align 4
+; X86-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]]
+; X86-NEXT:    br label [[BB2]]
+; X86:       bb2:
+; X86-NEXT:    [[TMP8:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP1]], [[BB1]] ]
+; X86-NEXT:    [[FAILUREREPORTINGMODE:%.*]] = getelementptr inbounds [[CLASS_D:%.*]], %class.D* [[THIS:%.*]], i64 0, i32 0
+; X86-NEXT:    [[TMP9:%.*]] = load i32, i32* [[FAILUREREPORTINGMODE]], align 4
+; X86-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP9]], 0
+; X86-NEXT:    br i1 [[COND]], label [[BB3:%.*]], label [[SW_EPILOG:%.*]]
+; X86:       bb3:
+; X86-NEXT:    br i1 [[CMP_ENTRY]], label [[BB4:%.*]], label [[BB5:%.*]]
+; X86:       bb4:
+; X86-NEXT:    [[M_I_I:%.*]] = getelementptr inbounds [[CLASS_B]], %class.B* [[P2]], i64 0, i32 1
+; X86-NEXT:    [[TMP10:%.*]] = load i32, i32* [[M_I_I]], align 4
+; X86-NEXT:    [[M3_I_I:%.*]] = getelementptr inbounds [[CLASS_B]], %class.B* [[P1]], i64 0, i32 1
+; X86-NEXT:    [[TMP11:%.*]] = load i32, i32* [[M3_I_I]], align 4
+; X86-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[TMP10]], [[TMP11]]
+; X86-NEXT:    br label [[BB5]]
+; X86:       bb5:
+; X86-NEXT:    [[TMP12:%.*]] = phi i1 [ false, [[BB3]] ], [ [[CMP4]], [[BB4]] ]
+; X86-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[CLASS_C]], %class.C* [[REF_TMP]], i64 0, i32 0
+; X86-NEXT:    br label [[SW_EPILOG]]
+; X86:       sw.epilog:
+; X86-NEXT:    ret i1 [[TMP8]]
+;
+entry:
+  %ref.tmp = alloca %class.C, align 1
+  %c.i = getelementptr inbounds %class.B, %class.B* %p2, i64 0, i32 0
+  %4 = load i32, i32* %c.i, align 4
+  %c2.i = getelementptr inbounds %class.B, %class.B* %p1, i64 0, i32 0
+  %5 = load i32, i32* %c2.i, align 4
+  %cmp_entry = icmp eq i32 %4, %5
+  br i1 %cmp_entry, label %bb1, label %bb2
+
+bb1:                                       ; preds = %entry
+  %m.i = getelementptr inbounds %class.B, %class.B* %p2, i64 0, i32 1
+  %6 = load i32, i32* %m.i, align 4
+  %m3.i = getelementptr inbounds %class.B, %class.B* %p1, i64 0, i32 1
+  %7 = load i32, i32* %m3.i, align 4
+  %cmp1 = icmp eq i32 %6, %7
+  br label %bb2
+
+bb2:                               ; preds = %entry, %bb1
+  %8 = phi i1 [ false, %entry ], [ %cmp1, %bb1 ]
+  %failureReportingMode = getelementptr inbounds %class.D, %class.D* %this, i64 0, i32 0
+  %9 = load i32, i32* %failureReportingMode, align 4
+  %cond = icmp eq i32 %9, 0
+  br i1 %cond, label %bb3, label %sw.epilog
+
+bb3:                                            ; preds = %bb2
+  br i1 %cmp_entry, label %bb4, label %bb5
+
+bb4:                                     ; preds = %bb3
+  %m.i.i = getelementptr inbounds %class.B, %class.B* %p2, i64 0, i32 1
+  %10 = load i32, i32* %m.i.i, align 4
+  %m3.i.i = getelementptr inbounds %class.B, %class.B* %p1, i64 0, i32 1
+  %11 = load i32, i32* %m3.i.i, align 4
+  %cmp4 = icmp eq i32 %10, %11
+  br label %bb5
+
+bb5:                          ; preds = %bb3, %bb4
+  %12 = phi i1 [ false, %bb3 ], [ %cmp4, %bb4 ]
+  %13 = getelementptr inbounds %class.C, %class.C* %ref.tmp, i64 0, i32 0
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %bb2
+  ret i1 %8
+}
+

Added: llvm/trunk/test/Transforms/MergeICmps/X86/split-block-does-work.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/split-block-does-work.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/split-block-does-work.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/split-block-does-work.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,113 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
+
+%"struct.std::pair" = type { i32, i32, i32, i32 }
+
+declare void @foo(...)  nounwind readnone
+
+; We can split %entry and create a memcmp(16 bytes).
+define zeroext i1 @opeq1(
+; X86-LABEL: @opeq1(
+;
+; Make sure this call is moved to the beginning of the entry block.
+; X86:      entry:
+; X86-NEXT:    call void (...) @foo()
+; X86-NEXT:    [[THIRD_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
+; X86-NEXT:    [[THIRD1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
+; X86-NEXT:    [[CSTR:%.*]] = bitcast i32* [[THIRD_I]] to i8*
+; X86-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[THIRD1_I]] to i8*
+; X86-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16)
+; X86-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; X86-NEXT:    br label [[OPEQ1_EXIT:%.*]]
+;
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %1 = load i32, i32* %first1.i, align 4
+  ; Does other work.
+  call void (...) @foo()
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp2.i = icmp eq i32 %2, %3
+  br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit
+
+land.rhs.i.2:
+  %third.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 2
+  %4 = load i32, i32* %third.i, align 4
+  %third2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 2
+  %5 = load i32, i32* %third2.i, align 4
+  %cmp3.i = icmp eq i32 %4, %5
+  br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit
+
+land.rhs.i.3:
+  %fourth.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 3
+  %6 = load i32, i32* %fourth.i, align 4
+  %fourth2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3
+  %7 = load i32, i32* %fourth2.i, align 4
+  %cmp4.i = icmp eq i32 %6, %7
+  br label %opeq1.exit
+
+opeq1.exit:
+  %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ]
+  ret i1 %8
+}
+
+
+; We will not be able to merge anything, make sure the call is not moved out.
+define zeroext i1 @opeq1_discontiguous(
+; X86-LABEL: @opeq1_discontiguous(
+;
+; Make sure this call is moved in the entry block.
+; X86:      entry:
+; X86:        [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 1 
+; X86:        [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
+; X86:        call void (...) @foo()
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 
+  %1 = load i32, i32* %first1.i, align 4
+  ; Does other work.
+  call void (...) @foo()
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 2 
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp2.i = icmp eq i32 %2, %3
+  br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit
+
+land.rhs.i.2:
+  %third.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 2
+  %4 = load i32, i32* %third.i, align 4
+  %third2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3
+  %5 = load i32, i32* %third2.i, align 4
+  %cmp3.i = icmp eq i32 %4, %5
+  br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit
+
+land.rhs.i.3:
+  %fourth.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %6 = load i32, i32* %fourth.i, align 4
+  %fourth2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3 
+  %7 = load i32, i32* %fourth2.i, align 4
+  %cmp4.i = icmp eq i32 %6, %7
+  br label %opeq1.exit
+
+opeq1.exit:
+  %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ]
+  ret i1 %8
+}

Added: llvm/trunk/test/Transforms/MergeICmps/X86/tuple-four-int8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/tuple-four-int8.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/tuple-four-int8.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/tuple-four-int8.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,81 @@
+; XFAIL: *
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
+
+; This is a more involved test: clang generates this weird pattern for
+; tuple<uint8_t, uint8_t, uint8_t, uint8_t>. Right now we skip the entry block
+; (which defines the base pointer for other blocks) and the last one (which
+; does not have the expected structure). Only middle blocks (bytes [1,2]) are
+; merged.
+
+%"class.std::tuple" = type { %"struct.std::_Tuple_impl" }
+%"struct.std::_Tuple_impl" = type { %"struct.std::_Tuple_impl.0", %"struct.std::_Head_base.6" }
+%"struct.std::_Tuple_impl.0" = type { %"struct.std::_Tuple_impl.1", %"struct.std::_Head_base.5" }
+%"struct.std::_Tuple_impl.1" = type { %"struct.std::_Tuple_impl.2", %"struct.std::_Head_base.4" }
+%"struct.std::_Tuple_impl.2" = type { %"struct.std::_Head_base" }
+%"struct.std::_Head_base" = type { i8 }
+%"struct.std::_Head_base.4" = type { i8 }
+%"struct.std::_Head_base.5" = type { i8 }
+%"struct.std::_Head_base.6" = type { i8 }
+
+define zeroext i1 @opeq(
+; CHECK-LABEL: @opeq(
+;
+; These 2 instructions are split. Then we can merge 3 bytes, instead of 2.
+; CHECK:         br label [[LAND_ELEM0:%.*]]
+; CHECK:       land.elem1:
+; CHECK-NEXT:    [[A_ELEM1_ADDR:%.*]] = getelementptr inbounds i8, i8* %a.base, i64 1
+; CHECK-NEXT:    [[B_ELEM1_ADDR:%.*]] = getelementptr inbounds i8, i8* %b.base, i64 1
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[A_ELEM1_ADDR]], i8* [[B_ELEM1_ADDR]], i64 3)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    br label [[OPEQ_EXIT:%.*]]
+; CHECK:       land.elem0:
+; CHECK:         [[A_BASE:%.*]] = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* [[A:%.*]], i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
+; CHECK:         [[B_BASE:%.*]] = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* [[B:%.*]], i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, i8* [[A_BASE]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[B_BASE]], align 1
+; CHECK-NEXT:    [[CMP_ELEM0:%.*]] = icmp eq i8 [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[CMP_ELEM0]], label [[LAND_ELEM1:%.*]], label [[OPEQ_EXIT]]
+; CHECK:       opeq.exit:
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i1 [ [[CMP_ELEM0]], [[LAND_ELEM0]] ], [ [[TMP2]], [[LAND_ELEM1]] ]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %"class.std::tuple"* nocapture readonly dereferenceable(4) %a,
+  %"class.std::tuple"* nocapture readonly dereferenceable(4) %b) local_unnamed_addr #1 {
+entry:
+  %a.base = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* %a, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %a.elem3.addr = getelementptr inbounds i8, i8* %a.base, i64 3
+  %0 = load i8, i8* %a.elem3.addr, align 1
+  %b.base = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* %b, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %b.elem3.addr = getelementptr inbounds i8, i8* %b.base, i64 3
+  %1 = load i8, i8* %b.elem3.addr, align 1
+  %cmp.elem3 = icmp eq i8 %0, %1
+  br i1 %cmp.elem3, label %land.elem2, label %opeq.exit
+
+land.elem2:
+  %a.elem2.addr = getelementptr inbounds i8, i8* %a.base, i64 2
+  %2 = load i8, i8* %a.elem2.addr, align 1
+  %b.elem2.addr = getelementptr inbounds i8, i8* %b.base, i64 2
+  %3 = load i8, i8* %b.elem2.addr, align 1
+  %cmp.elem2 = icmp eq i8 %2, %3
+  br i1 %cmp.elem2, label %land.elem1, label %opeq.exit
+
+land.elem1:
+  %a.elem1.addr = getelementptr inbounds i8, i8* %a.base, i64 1
+  %4 = load i8, i8* %a.elem1.addr, align 1
+  %b.elem1.addr = getelementptr inbounds i8, i8* %b.base, i64 1
+  %5 = load i8, i8* %b.elem1.addr, align 1
+  %cmp.elem1 = icmp eq i8 %4, %5
+  br i1 %cmp.elem1, label %land.elem0, label %opeq.exit
+
+land.elem0:
+  %6 = load i8, i8* %a.base, align 1
+  %7 = load i8, i8* %b.base, align 1
+  %cmp.elem0 = icmp eq i8 %6, %7
+  br label %opeq.exit
+
+opeq.exit:
+  %8 = phi i1 [ false, %entry ], [ false, %land.elem2 ], [ false, %land.elem1 ], [ %cmp.elem0, %land.elem0 ]
+  ret i1 %8
+}
+

Added: llvm/trunk/test/Transforms/MergeICmps/X86/two-complex-bb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/two-complex-bb.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/two-complex-bb.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/two-complex-bb.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
+
+%"struct.std::pair" = type { i32, i32 }
+
+; This tests a function with two complex basic blocks.
+define zeroext i1 @twocomplexblocks(
+; X86-LABEL: @twocomplexblocks(
+; X86-NEXT:  entry:
+; X86-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
+; X86-NEXT:    [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
+; X86-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
+; X86-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
+; X86-NEXT:    [[EXTRAWORK:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; X86-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; X86-NEXT:    br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
+; X86:       land.rhs.i:
+; X86-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1
+; X86-NEXT:    [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4
+; X86-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1
+; X86-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
+; X86-NEXT:    [[EXTRAWORK2:%.*]] = add i32 [[TMP2]], [[TMP3]]
+; X86-NEXT:    [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; X86-NEXT:    br label [[OPEQ1_EXIT]]
+; X86:       opeq1.exit:
+; X86-NEXT:    [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
+; X86-NEXT:    ret i1 [[TMP4]]
+;
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+entry:
+  ; This is a complex BCE Basic Block.
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %1 = load i32, i32* %first1.i, align 4
+  %extrawork = add i32 %0, %1
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  ; This is a complex BCE Basic Block.
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %3 = load i32, i32* %second2.i, align 4
+  %extrawork2 = add i32 %2, %3
+  %cmp3.i = icmp eq i32 %2, %3
+  br label %opeq1.exit
+
+opeq1.exit:
+  %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
+  ret i1 %4
+}
+
+
+
+

Added: llvm/trunk/test/Transforms/MergeICmps/X86/volatile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/X86/volatile.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/X86/volatile.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/X86/volatile.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
+
+%"struct.std::pair" = type { i32, i32 }
+
+define zeroext i1 @opeq(
+; CHECK-LABEL: @opeq(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
+; CHECK-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
+; CHECK:       land.rhs.i:
+; CHECK-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load volatile i32, i32* [[SECOND_I]], align 4
+; CHECK-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
+; CHECK-NEXT:    [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    br label [[OPEQ1_EXIT]]
+; CHECK:       opeq1.exit:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
+; CHECK-NEXT:    ret i1 [[TMP4]]
+;
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %1 = load i32, i32* %first1.i, align 4
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %2 = load volatile i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp3.i = icmp eq i32 %2, %3
+  br label %opeq1.exit
+
+opeq1.exit:
+  %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
+  ret i1 %4
+}
+

Added: llvm/trunk/test/Transforms/MergeICmps/pair-int32-int32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MergeICmps/pair-int32-int32.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MergeICmps/pair-int32-int32.ll (added)
+++ llvm/trunk/test/Transforms/MergeICmps/pair-int32-int32.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mergeicmps -S | FileCheck %s --check-prefix=NOEXPANSION
+
+%"struct.std::pair" = type { i32, i32 }
+
+define zeroext i1 @opeq1(
+; NOEXPANSION-LABEL: @opeq1(
+; NOEXPANSION-NEXT:  entry:
+; NOEXPANSION-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
+; NOEXPANSION-NEXT:    [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
+; NOEXPANSION-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
+; NOEXPANSION-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
+; NOEXPANSION-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; NOEXPANSION-NEXT:    br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
+; NOEXPANSION:       land.rhs.i:
+; NOEXPANSION-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1
+; NOEXPANSION-NEXT:    [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4
+; NOEXPANSION-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1
+; NOEXPANSION-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
+; NOEXPANSION-NEXT:    [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; NOEXPANSION-NEXT:    br label [[OPEQ1_EXIT]]
+; NOEXPANSION:       opeq1.exit:
+; NOEXPANSION-NEXT:    [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
+; NOEXPANSION-NEXT:    ret i1 [[TMP4]]
+;
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %1 = load i32, i32* %first1.i, align 4
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp3.i = icmp eq i32 %2, %3
+  br label %opeq1.exit
+
+opeq1.exit:
+  %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
+  ret i1 %4
+}
+
+; Same as above, but the two blocks are in inverse order.
+define zeroext i1 @opeq1_inverse(
+; NOEXPANSION-LABEL: @opeq1_inverse(
+; NOEXPANSION-NEXT:  entry:
+; NOEXPANSION-NEXT:    [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 1
+; NOEXPANSION-NEXT:    [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
+; NOEXPANSION-NEXT:    [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 1
+; NOEXPANSION-NEXT:    [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
+; NOEXPANSION-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; NOEXPANSION-NEXT:    br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
+; NOEXPANSION:       land.rhs.i:
+; NOEXPANSION-NEXT:    [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 0
+; NOEXPANSION-NEXT:    [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4
+; NOEXPANSION-NEXT:    [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 0
+; NOEXPANSION-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
+; NOEXPANSION-NEXT:    [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; NOEXPANSION-NEXT:    br label [[OPEQ1_EXIT]]
+; NOEXPANSION:       opeq1.exit:
+; NOEXPANSION-NEXT:    [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
+; NOEXPANSION-NEXT:    ret i1 [[TMP4]]
+;
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
+  %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
+entry:
+  %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
+  %0 = load i32, i32* %first.i, align 4
+  %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
+  %1 = load i32, i32* %first1.i, align 4
+  %cmp.i = icmp eq i32 %0, %1
+  br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
+
+land.rhs.i:
+  %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
+  %2 = load i32, i32* %second.i, align 4
+  %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
+  %3 = load i32, i32* %second2.i, align 4
+  %cmp3.i = icmp eq i32 %2, %3
+  br label %opeq1.exit
+
+opeq1.exit:
+  %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
+  ret i1 %4
+}
+
+
+

Added: llvm/trunk/test/Transforms/MetaRenamer/main.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MetaRenamer/main.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MetaRenamer/main.ll (added)
+++ llvm/trunk/test/Transforms/MetaRenamer/main.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; Make sure @main is left untouched.
+; RUN: opt -metarenamer -S %s | FileCheck %s
+
+; CHECK: define void @main
+; CHECK: call void @main
+
+define void @main() {
+  call void @patatino()
+  ret void
+}
+
+define void @patatino() {
+  call void @main()
+  ret void
+}

Added: llvm/trunk/test/Transforms/MetaRenamer/metarenamer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MetaRenamer/metarenamer.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MetaRenamer/metarenamer.ll (added)
+++ llvm/trunk/test/Transforms/MetaRenamer/metarenamer.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,113 @@
+; RUN: opt -metarenamer -S < %s | FileCheck %s
+
+; CHECK: target triple {{.*}}
+; CHECK-NOT: {{^x*}}xxx{{^x*}}
+; CHECK: ret i32 6
+
+target triple = "x86_64-pc-linux-gnu"
+
+%struct.bar_xxx = type { i32, double }
+%struct.foo_xxx = type { i32, float, %struct.bar_xxx }
+
+ at func_5_xxx.static_local_3_xxx = internal global i32 3, align 4
+ at global_3_xxx = common global i32 0, align 4
+
+ at func_7_xxx = weak alias i32 (...), i32 (...)* @aliased_func_7_xxx
+
+define i32 @aliased_func_7_xxx(...) {
+  ret i32 0
+}
+
+define i32 @func_3_xxx() nounwind uwtable ssp {
+  ret i32 3
+}
+
+define void @func_4_xxx(%struct.foo_xxx* sret %agg.result) nounwind uwtable ssp {
+  %1 = alloca %struct.foo_xxx, align 8
+  %2 = getelementptr inbounds %struct.foo_xxx, %struct.foo_xxx* %1, i32 0, i32 0
+  store i32 1, i32* %2, align 4
+  %3 = getelementptr inbounds %struct.foo_xxx, %struct.foo_xxx* %1, i32 0, i32 1
+  store float 2.000000e+00, float* %3, align 4
+  %4 = getelementptr inbounds %struct.foo_xxx, %struct.foo_xxx* %1, i32 0, i32 2
+  %5 = getelementptr inbounds %struct.bar_xxx, %struct.bar_xxx* %4, i32 0, i32 0
+  store i32 3, i32* %5, align 4
+  %6 = getelementptr inbounds %struct.bar_xxx, %struct.bar_xxx* %4, i32 0, i32 1
+  store double 4.000000e+00, double* %6, align 8
+  %7 = bitcast %struct.foo_xxx* %agg.result to i8*
+  %8 = bitcast %struct.foo_xxx* %1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %7, i8* align 8 %8, i64 24, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+define i32 @func_5_xxx(i32 %arg_1_xxx, i32 %arg_2_xxx, i32 %arg_3_xxx, i32 %arg_4_xxx) nounwind uwtable ssp {
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  %local_1_xxx = alloca i32, align 4
+  %local_2_xxx = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %arg_1_xxx, i32* %1, align 4
+  store i32 %arg_2_xxx, i32* %2, align 4
+  store i32 %arg_3_xxx, i32* %3, align 4
+  store i32 %arg_4_xxx, i32* %4, align 4
+  store i32 1, i32* %local_1_xxx, align 4
+  store i32 2, i32* %local_2_xxx, align 4
+  store i32 0, i32* %i, align 4
+  br label %5
+
+; <label>:5                                       ; preds = %9, %0
+  %6 = load i32, i32* %i, align 4
+  %7 = icmp slt i32 %6, 10
+  br i1 %7, label %8, label %12
+
+; <label>:8                                       ; preds = %5
+  br label %9
+
+; <label>:9                                       ; preds = %8
+  %10 = load i32, i32* %i, align 4
+  %11 = add nsw i32 %10, 1
+  store i32 %11, i32* %i, align 4
+  br label %5
+
+; <label>:12                                      ; preds = %5
+  %13 = load i32, i32* %local_1_xxx, align 4
+  %14 = load i32, i32* %1, align 4
+  %15 = add nsw i32 %13, %14
+  %16 = load i32, i32* %local_2_xxx, align 4
+  %17 = add nsw i32 %15, %16
+  %18 = load i32, i32* %2, align 4
+  %19 = add nsw i32 %17, %18
+  %20 = load i32, i32* @func_5_xxx.static_local_3_xxx, align 4
+  %21 = add nsw i32 %19, %20
+  %22 = load i32, i32* %3, align 4
+  %23 = add nsw i32 %21, %22
+  %24 = load i32, i32* %4, align 4
+  %25 = add nsw i32 %23, %24
+  ret i32 %25
+}
+
+define i32 @varargs_func_6_xxx(i32 %arg_1_xxx, i32 %arg_2_xxx, ...) nounwind uwtable ssp {
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  store i32 %arg_1_xxx, i32* %1, align 4
+  store i32 %arg_2_xxx, i32* %2, align 4
+  ret i32 6
+}
+
+declare noalias i8* @malloc(i32)
+declare void @free(i8* nocapture)
+
+define void @dont_rename_lib_funcs() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = call i8* @malloc(i32 23)
+; CHECK-NEXT:    call void @free(i8* [[TMP]])
+; CHECK-NEXT:    ret void
+;
+  %x = call i8* @malloc(i32 23)
+  call void @free(i8* %x)
+  ret void
+}

Added: llvm/trunk/test/Transforms/NameAnonGlobals/rename.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NameAnonGlobals/rename.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NameAnonGlobals/rename.ll (added)
+++ llvm/trunk/test/Transforms/NameAnonGlobals/rename.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; RUN: opt -S -name-anon-globals < %s | FileCheck %s
+; RUN: opt -prepare-for-thinlto -O0 -module-summary -o %t.bc < %s
+
+
+; foo contribute to the unique hash for the module
+define void @foo() {
+    ret void
+}
+
+; bar is internal, and does not contribute to the unique hash for the module
+define internal void @bar() {
+    ret void
+}
+
+; CHECK: @anon.acbd18db4cc2f85cedef654fccc4a4d8.3 = global i8 0
+; CHECK: @anon.acbd18db4cc2f85cedef654fccc4a4d8.4 = alias i8, i8* @anon.acbd18db4cc2f85cedef654fccc4a4d8.3
+; CHECK: define void @anon.acbd18db4cc2f85cedef654fccc4a4d8.0()
+; CHECK: define void @anon.acbd18db4cc2f85cedef654fccc4a4d8.1()
+; CHECK: define void @anon.acbd18db4cc2f85cedef654fccc4a4d8.2()
+
+define void @0() {
+    ret void
+}
+define void @1() {
+    ret void
+}
+define void @2() {
+    ret void
+}
+
+
+ at 3 = global i8 0
+
+ at 4 = alias i8, i8 *@3

Added: llvm/trunk/test/Transforms/NaryReassociate/NVPTX/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NaryReassociate/NVPTX/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NaryReassociate/NVPTX/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/NaryReassociate/NVPTX/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+    config.unsupported = True

Added: llvm/trunk/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll (added)
+++ llvm/trunk/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,144 @@
+; RUN: opt < %s -nary-reassociate -early-cse -S | FileCheck %s
+; RUN: opt < %s -passes='nary-reassociate' -S | opt -early-cse -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+declare void @foo(float*)
+
+; foo(&a[i]);
+; foo(&a[i + j]);
+;   =>
+; t = &a[i];
+; foo(t);
+; foo(t + j);
+define void @reassociate_gep(float* %a, i64 %i, i64 %j) {
+; CHECK-LABEL: @reassociate_gep(
+  %1 = add i64 %i, %j
+  %2 = getelementptr float, float* %a, i64 %i
+; CHECK: [[t1:[^ ]+]] = getelementptr float, float* %a, i64 %i
+  call void @foo(float* %2)
+; CHECK: call void @foo(float* [[t1]])
+  %3 = getelementptr float, float* %a, i64 %1
+; CHECK: [[t2:[^ ]+]] = getelementptr float, float* [[t1]], i64 %j
+  call void @foo(float* %3)
+; CHECK: call void @foo(float* [[t2]])
+  ret void
+}
+
+; foo(&a[sext(j)]);
+; foo(&a[sext(i +nsw j)]);
+; foo(&a[sext((i +nsw j) +nsw i)]);
+;   =>
+; t1 = &a[sext(j)];
+; foo(t1);
+; t2 = t1 + sext(i);
+; foo(t2);
+; t3 = t2 + sext(i); // sext(i) should be GVN'ed.
+; foo(t3);
+define void @reassociate_gep_nsw(float* %a, i32 %i, i32 %j) {
+; CHECK-LABEL: @reassociate_gep_nsw(
+  %idxprom.j = sext i32 %j to i64
+  %1 = getelementptr float, float* %a, i64 %idxprom.j
+; CHECK: [[t1:[^ ]+]] = getelementptr float, float* %a, i64 %idxprom.j
+  call void @foo(float* %1)
+; CHECK: call void @foo(float* [[t1]])
+
+  %2 = add nsw i32 %i, %j
+  %idxprom.2 = sext i32 %2 to i64
+  %3 = getelementptr float, float* %a, i64 %idxprom.2
+; CHECK: [[sexti:[^ ]+]] = sext i32 %i to i64
+; CHECK: [[t2:[^ ]+]] = getelementptr float, float* [[t1]], i64 [[sexti]]
+  call void @foo(float* %3)
+; CHECK: call void @foo(float* [[t2]])
+
+  %4 = add nsw i32 %2, %i
+  %idxprom.4 = sext i32 %4 to i64
+  %5 = getelementptr float, float* %a, i64 %idxprom.4
+; CHECK: [[t3:[^ ]+]] = getelementptr float, float* [[t2]], i64 [[sexti]]
+  call void @foo(float* %5)
+; CHECK: call void @foo(float* [[t3]])
+
+  ret void
+}
+
+; assume(j >= 0);
+; foo(&a[zext(j)]);
+; assume(i + j >= 0);
+; foo(&a[zext(i + j)]);
+;   =>
+; t1 = &a[zext(j)];
+; foo(t1);
+; t2 = t1 + sext(i);
+; foo(t2);
+define void @reassociate_gep_assume(float* %a, i32 %i, i32 %j) {
+; CHECK-LABEL: @reassociate_gep_assume(
+  ; assume(j >= 0)
+  %cmp = icmp sgt i32 %j, -1
+  call void @llvm.assume(i1 %cmp)
+  %1 = add i32 %i, %j
+  %cmp2 = icmp sgt i32 %1, -1
+  call void @llvm.assume(i1 %cmp2)
+
+  %idxprom.j = zext i32 %j to i64
+  %2 = getelementptr float, float* %a, i64 %idxprom.j
+; CHECK: [[t1:[^ ]+]] = getelementptr float, float* %a, i64 %idxprom.j
+  call void @foo(float* %2)
+; CHECK: call void @foo(float* [[t1]])
+
+  %idxprom.1 = zext i32 %1 to i64
+  %3 = getelementptr float, float* %a, i64 %idxprom.1
+; CHECK: [[sexti:[^ ]+]] = sext i32 %i to i64
+; CHECK: [[t2:[^ ]+]] = getelementptr float, float* [[t1]], i64 [[sexti]]
+  call void @foo(float* %3)
+; CHECK: call void @foo(float* [[t2]])
+
+  ret void
+}
+
+; Do not split the second GEP because sext(i + j) != sext(i) + sext(j).
+define void @reassociate_gep_no_nsw(float* %a, i32 %i, i32 %j) {
+; CHECK-LABEL: @reassociate_gep_no_nsw(
+  %1 = add i32 %i, %j
+  %2 = getelementptr float, float* %a, i32 %j
+; CHECK: getelementptr float, float* %a, i32 %j
+  call void @foo(float* %2)
+  %3 = getelementptr float, float* %a, i32 %1
+; CHECK: getelementptr float, float* %a, i32 %1
+  call void @foo(float* %3)
+  ret void
+}
+
+define void @reassociate_gep_128(float* %a, i128 %i, i128 %j) {
+; CHECK-LABEL: @reassociate_gep_128(
+  %1 = add i128 %i, %j
+  %2 = getelementptr float, float* %a, i128 %i
+; CHECK: [[t1:[^ ]+]] = getelementptr float, float* %a, i128 %i
+  call void @foo(float* %2)
+; CHECK: call void @foo(float* [[t1]])
+  %3 = getelementptr float, float* %a, i128 %1
+; CHECK: [[truncj:[^ ]+]] = trunc i128 %j to i64
+; CHECK: [[t2:[^ ]+]] = getelementptr float, float* [[t1]], i64 [[truncj]]
+  call void @foo(float* %3)
+; CHECK: call void @foo(float* [[t2]])
+  ret void
+}
+
+%struct.complex = type { float, float }
+
+declare void @bar(%struct.complex*)
+
+define void @different_types(%struct.complex* %input, i64 %i) {
+; CHECK-LABEL: @different_types(
+  %t1 = getelementptr %struct.complex, %struct.complex* %input, i64 %i
+  call void @bar(%struct.complex* %t1)
+  %j = add i64 %i, 5
+  %t2 = getelementptr %struct.complex, %struct.complex* %input, i64 %j, i32 0
+; CHECK: [[cast:[^ ]+]] = bitcast %struct.complex* %t1 to float*
+; CHECK-NEXT: %t2 = getelementptr float, float* [[cast]], i64 10
+; CHECK-NEXT: call void @foo(float* %t2)
+  call void @foo(float* %t2)
+  ret void
+}
+
+declare void @llvm.assume(i1)

Added: llvm/trunk/test/Transforms/NaryReassociate/NVPTX/nary-slsr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NaryReassociate/NVPTX/nary-slsr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NaryReassociate/NVPTX/nary-slsr.ll (added)
+++ llvm/trunk/test/Transforms/NaryReassociate/NVPTX/nary-slsr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,48 @@
+; RUN: opt < %s -slsr -nary-reassociate -S | FileCheck %s
+; RUN: opt < %s -slsr -S | opt -passes='nary-reassociate' -S | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+
+; foo((a + b) + c);
+; foo((a + b * 2) + c);
+; foo((a + b * 3) + c);
+;   =>
+; abc = (a + b) + c;
+; foo(abc);
+; ab2c = abc + b;
+; foo(ab2c);
+; ab3c = ab2c + b;
+; foo(ab3c);
+define void @nary_reassociate_after_slsr(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @nary_reassociate_after_slsr(
+; PTX-LABEL: .visible .func nary_reassociate_after_slsr(
+; PTX: ld.param.u32 [[b:%r[0-9]+]], [nary_reassociate_after_slsr_param_1];
+  %ab = add i32 %a, %b
+  %abc = add i32 %ab, %c
+  call void @foo(i32 %abc)
+; CHECK: call void @foo(i32 %abc)
+; PTX: st.param.b32 [param0+0], [[abc:%r[0-9]+]];
+
+  %b2 = shl i32 %b, 1
+  %ab2 = add i32 %a, %b2
+  %ab2c = add i32 %ab2, %c
+; CHECK-NEXT: %ab2c = add i32 %abc, %b
+; PTX: add.s32 [[ab2c:%r[0-9]+]], [[abc]], [[b]]
+  call void @foo(i32 %ab2c)
+; CHECK-NEXT: call void @foo(i32 %ab2c)
+; PTX: st.param.b32 [param0+0], [[ab2c]];
+
+  %b3 = mul i32 %b, 3
+  %ab3 = add i32 %a, %b3
+  %ab3c = add i32 %ab3, %c
+; CHECK-NEXT: %ab3c = add i32 %ab2c, %b
+; PTX: add.s32 [[ab3c:%r[0-9]+]], [[ab2c]], [[b]]
+  call void @foo(i32 %ab3c)
+; CHECK-NEXT: call void @foo(i32 %ab3c)
+; PTX: st.param.b32 [param0+0], [[ab3c]];
+
+  ret void
+}
+
+declare void @foo(i32)

Added: llvm/trunk/test/Transforms/NaryReassociate/nary-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NaryReassociate/nary-add.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NaryReassociate/nary-add.ll (added)
+++ llvm/trunk/test/Transforms/NaryReassociate/nary-add.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,212 @@
+; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+
+declare void @foo(i32)
+
+; foo(a + c);
+; foo((a + (b + c));
+;   =>
+; t = a + c;
+; foo(t);
+; foo(t + b);
+define void @left_reassociate(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @left_reassociate(
+  %1 = add i32 %a, %c
+; CHECK: [[BASE:%[a-zA-Z0-9]+]] = add i32 %a, %c
+  call void @foo(i32 %1)
+  %2 = add i32 %b, %c
+  %3 = add i32 %a, %2
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = add i32 [[BASE]], %b
+  call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
+  ret void
+}
+
+; foo(a + c);
+; foo((a + b) + c);
+;   =>
+; t = a + c;
+; foo(t);
+; foo(t + b);
+define void @right_reassociate(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @right_reassociate(
+  %1 = add i32 %a, %c
+; CHECK: [[BASE:%[a-zA-Z0-9]+]] = add i32 %a, %c
+  call void @foo(i32 %1)
+  %2 = add i32 %a, %b
+  %3 = add i32 %2, %c
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = add i32 [[BASE]], %b
+  call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
+  ret void
+}
+
+; t1 = a + c;
+; foo(t1);
+; t2 = a + b;
+; foo(t2);
+; t3 = t2 + c;
+; foo(t3);
+;
+; Do not rewrite t3 into t1 + b because t2 is used elsewhere and is likely free.
+define void @no_reassociate(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @no_reassociate(
+  %1 = add i32 %a, %c
+; CHECK: add i32 %a, %c
+  call void @foo(i32 %1)
+  %2 = add i32 %a, %b
+; CHECK: add i32 %a, %b
+  call void @foo(i32 %2)
+  %3 = add i32 %2, %c
+; CHECK: add i32 %2, %c
+  call void @foo(i32 %3)
+  ret void
+}
+
+; if (p1)
+;   foo(a + c);
+; if (p2)
+;   foo(a + c);
+; if (p3)
+;   foo((a + b) + c);
+;
+; No action because (a + c) does not dominate ((a + b) + c).
+define void @conditional(i1 %p1, i1 %p2, i1 %p3, i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @conditional(
+entry:
+  br i1 %p1, label %then1, label %branch1
+
+then1:
+  %0 = add i32 %a, %c
+; CHECK: add i32 %a, %c
+  call void @foo(i32 %0)
+  br label %branch1
+
+branch1:
+  br i1 %p2, label %then2, label %branch2
+
+then2:
+  %1 = add i32 %a, %c
+; CHECK: add i32 %a, %c
+  call void @foo(i32 %1)
+  br label %branch2
+
+branch2:
+  br i1 %p3, label %then3, label %return
+
+then3:
+  %2 = add i32 %a, %b
+; CHECK: %2 = add i32 %a, %b
+  %3 = add i32 %2, %c
+; CHECK: add i32 %2, %c
+  call void @foo(i32 %3)
+  br label %return
+
+return:
+  ret void
+}
+
+; This test involves more conditional reassociation candidates. It exercises
+; the stack optimization in tryReassociatedAdd that pops the candidates that
+; do not dominate the current instruction.
+;
+;       def1
+;      cond1
+;      /  \
+;     /    \
+;   cond2  use2
+;   /  \
+;  /    \
+; def2  def3
+;      cond3
+;       /  \
+;      /    \
+;    def4   use1
+;
+; NaryReassociate should match use1 with def3, and use2 with def1.
+define void @conditional2(i32 %a, i32 %b, i32 %c, i1 %cond1, i1 %cond2, i1 %cond3) {
+entry:
+  %def1 = add i32 %a, %b
+  br i1 %cond1, label %bb1, label %bb6
+bb1:
+  br i1 %cond2, label %bb2, label %bb3
+bb2:
+  %def2 = add i32 %a, %b
+  call void @foo(i32 %def2)
+  ret void
+bb3:
+  %def3 = add i32 %a, %b
+  br i1 %cond3, label %bb4, label %bb5
+bb4:
+  %def4 = add i32 %a, %b
+  call void @foo(i32 %def4)
+  ret void
+bb5:
+  %0 = add i32 %a, %c
+  %1 = add i32 %0, %b
+; CHECK: [[t1:%[0-9]+]] = add i32 %def3, %c
+  call void @foo(i32 %1) ; foo((a + c) + b);
+; CHECK-NEXT: call void @foo(i32 [[t1]])
+  ret void
+bb6:
+  %2 = add i32 %a, %c
+  %3 = add i32 %2, %b
+; CHECK: [[t2:%[0-9]+]] = add i32 %def1, %c
+  call void @foo(i32 %3) ; foo((a + c) + b);
+; CHECK-NEXT: call void @foo(i32 [[t2]])
+  ret void
+}
+
+; foo((a + b) + c)
+; foo(((a + d) + b) + c)
+;   =>
+; t = (a + b) + c;
+; foo(t);
+; foo(t + d);
+define void @quaternary(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @quaternary(
+  %1 = add i32 %a, %b
+  %2 = add i32 %1, %c
+  call void @foo(i32 %2)
+; CHECK: call void @foo(i32 [[TMP1:%[a-zA-Z0-9]]])
+  %3 = add i32 %a, %d
+  %4 = add i32 %3, %b
+  %5 = add i32 %4, %c
+; CHECK: [[TMP2:%[a-zA-Z0-9]]] = add i32 [[TMP1]], %d
+  call void @foo(i32 %5)
+; CHECK: call void @foo(i32 [[TMP2]]
+  ret void
+}
+
+define void @iterative(i32 %a, i32 %b, i32 %c) {
+  %ab = add i32 %a, %b
+  %abc = add i32 %ab, %c
+  call void @foo(i32 %abc)
+
+  %ab2 = add i32 %ab, %b
+  %ab2c = add i32 %ab2, %c
+; CHECK: %ab2c = add i32 %abc, %b
+  call void @foo(i32 %ab2c)
+; CHECK-NEXT: call void @foo(i32 %ab2c)
+
+  %ab3 = add i32 %ab2, %b
+  %ab3c = add i32 %ab3, %c
+; CHECK-NEXT: %ab3c = add i32 %ab2c, %b
+  call void @foo(i32 %ab3c)
+; CHECK-NEXT: call void @foo(i32 %ab3c)
+
+  ret void
+}
+
+define void @avoid_infinite_loop(i32 %a, i32 %b) {
+; CHECK-LABEL: @avoid_infinite_loop
+  %ab = add i32 %a, %b
+; CHECK-NEXT: %ab
+  %ab2 = add i32 %ab, %b
+; CHECK-NEXT: %ab2
+  call void @foo(i32 %ab2)
+; CHECK-NEXT: @foo(i32 %ab2)
+  ret void
+}

Added: llvm/trunk/test/Transforms/NaryReassociate/nary-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NaryReassociate/nary-mul.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NaryReassociate/nary-mul.ll (added)
+++ llvm/trunk/test/Transforms/NaryReassociate/nary-mul.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+
+declare void @foo(i32)
+
+; CHECK-LABEL: @bar(
+define void @bar(i32 %a, i32 %b, i32 %c) {
+  %1 = mul i32 %a, %c
+; CHECK: [[BASE:%[a-zA-Z0-9]+]] = mul i32 %a, %c
+  call void @foo(i32 %1)
+  %2 = mul i32 %a, %b
+  %3 = mul i32 %2, %c
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = mul i32 [[BASE]], %b
+  call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/NaryReassociate/pr24301.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NaryReassociate/pr24301.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NaryReassociate/pr24301.ll (added)
+++ llvm/trunk/test/Transforms/NaryReassociate/pr24301.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s
+
+define i32 @foo(i32 %tmp4) {
+; CHECK-LABEL: @foo(
+entry:
+  %tmp5 = add i32 %tmp4, 8
+  %tmp13 = add i32 %tmp4, -128  ; deleted
+  %tmp14 = add i32 %tmp13, 8    ; => %tmp5 + -128
+  %tmp21 = add i32 119, %tmp4
+  ; do not rewrite %tmp23 against %tmp13 because %tmp13 is already deleted
+  %tmp23 = add i32 %tmp21, -128
+; CHECK: %tmp23 = add i32 %tmp21, -128
+  ret i32 %tmp23
+}

Added: llvm/trunk/test/Transforms/NaryReassociate/pr35710.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NaryReassociate/pr35710.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NaryReassociate/pr35710.ll (added)
+++ llvm/trunk/test/Transforms/NaryReassociate/pr35710.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+
+; The test check that compilation does not fall into infinite loop.
+
+define i8 @foo(i8 %v) local_unnamed_addr #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  region.0:
+; CHECK-NEXT:    [[TMP0:%.*]] = mul nsw i8 16, [[V:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw i8 0, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = mul nsw i8 1, [[TMP1]]
+; CHECK-NEXT:    ret i8 [[TMP2]]
+;
+region.0:
+  %0 = mul nsw i8 16, %v
+  %1 = mul nsw i8 0, %0
+  %2 = mul nsw i8 1, %1
+  ret i8 %2
+}

Added: llvm/trunk/test/Transforms/NaryReassociate/pr37539.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NaryReassociate/pr37539.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NaryReassociate/pr37539.ll (added)
+++ llvm/trunk/test/Transforms/NaryReassociate/pr37539.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -nary-reassociate -S -o - | FileCheck %s
+
+; The test check that compilation does not segv (see pr37539).
+
+define void @f1() {
+; CHECK-LABEL: @f1(
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:         [[BB1]]
+; CHECK-NEXT:    [[P1:%.*]] = phi i16 [ 0, [[TMP0:%.*]] ], [ [[A1:%.*]], %[[BB1]] ]
+; CHECK-NEXT:    [[SCEVGEP_OFFS:%.*]] = add i16 2, 0
+; CHECK-NEXT:    [[A1]] = add i16 [[P1]], [[SCEVGEP_OFFS]]
+; CHECK-NEXT:    br i1 false, label %[[BB1]], label %[[BB7:.*]]
+; CHECK:         [[BB7]]
+; CHECK-NEXT:    ret void
+;
+  br label %bb1
+
+bb1:
+  %p1 = phi i16 [ 0, %0 ], [ %a1, %bb1 ]
+  %p2 = phi i16 [ 0, %0 ], [ %a2, %bb1 ]
+  %scevgep.offs = add i16 2, 0
+  %a1 = add i16 %p1, %scevgep.offs
+  %scevgep.offs5 = add i16 2, 0
+  %a2 = add i16 %p2, %scevgep.offs5
+  br i1 false, label %bb1, label %bb7
+
+bb7:
+  ret void
+}

Added: llvm/trunk/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,86 @@
+; RUN: opt < %s -newgvn | llvm-dis
+
+	%struct.PerlInterpreter = type { i8 }
+ at PL_sv_count = external global i32		; <i32*> [#uses=2]
+
+define void @perl_destruct(%struct.PerlInterpreter* %sv_interp) {
+entry:
+	br i1 false, label %cond_next25, label %cond_true16
+
+cond_true16:		; preds = %entry
+	ret void
+
+cond_next25:		; preds = %entry
+	br i1 false, label %cond_next33, label %cond_true32
+
+cond_true32:		; preds = %cond_next25
+	ret void
+
+cond_next33:		; preds = %cond_next25
+	br i1 false, label %cond_next61, label %cond_true.i46
+
+cond_true.i46:		; preds = %cond_next33
+	ret void
+
+cond_next61:		; preds = %cond_next33
+	br i1 false, label %cond_next69, label %cond_true66
+
+cond_true66:		; preds = %cond_next61
+	ret void
+
+cond_next69:		; preds = %cond_next61
+	br i1 false, label %Perl_safefree.exit52, label %cond_true.i50
+
+cond_true.i50:		; preds = %cond_next69
+	ret void
+
+Perl_safefree.exit52:		; preds = %cond_next69
+	br i1 false, label %cond_next80, label %cond_true77
+
+cond_true77:		; preds = %Perl_safefree.exit52
+	ret void
+
+cond_next80:		; preds = %Perl_safefree.exit52
+	br i1 false, label %Perl_safefree.exit56, label %cond_true.i54
+
+cond_true.i54:		; preds = %cond_next80
+	ret void
+
+Perl_safefree.exit56:		; preds = %cond_next80
+	br i1 false, label %Perl_safefree.exit60, label %cond_true.i58
+
+cond_true.i58:		; preds = %Perl_safefree.exit56
+	ret void
+
+Perl_safefree.exit60:		; preds = %Perl_safefree.exit56
+	br i1 false, label %Perl_safefree.exit64, label %cond_true.i62
+
+cond_true.i62:		; preds = %Perl_safefree.exit60
+	ret void
+
+Perl_safefree.exit64:		; preds = %Perl_safefree.exit60
+	br i1 false, label %Perl_safefree.exit68, label %cond_true.i66
+
+cond_true.i66:		; preds = %Perl_safefree.exit64
+	ret void
+
+Perl_safefree.exit68:		; preds = %Perl_safefree.exit64
+	br i1 false, label %cond_next150, label %cond_true23.i
+
+cond_true23.i:		; preds = %Perl_safefree.exit68
+	ret void
+
+cond_next150:		; preds = %Perl_safefree.exit68
+	%tmp16092 = load i32, i32* @PL_sv_count, align 4		; <i32> [#uses=0]
+	br label %cond_next165
+
+bb157:		; preds = %cond_next165
+	%tmp158 = load i32, i32* @PL_sv_count, align 4		; <i32> [#uses=0]
+	br label %cond_next165
+
+cond_next165:		; preds = %bb157, %cond_next150
+	br i1 false, label %bb171, label %bb157
+
+bb171:		; preds = %cond_next165
+	ret void
+}

Added: llvm/trunk/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+
+	%struct.INT2 = type { i32, i32 }
+ at blkshifts = external global %struct.INT2*		; <%struct.INT2**> [#uses=2]
+
+define i32 @xcompact() {
+entry:
+	store %struct.INT2* null, %struct.INT2** @blkshifts, align 4
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp10 = load %struct.INT2*, %struct.INT2** @blkshifts, align 4		; <%struct.INT2*> [#uses=0]
+; CHECK-NOT:  %tmp10
+	br label %bb
+}

Added: llvm/trunk/test/Transforms/NewGVN/2007-07-25-Loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2007-07-25-Loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2007-07-25-Loop.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2007-07-25-Loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt < %s -newgvn | llvm-dis
+
+	%struct.s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
+
+define void @print_arch(i8* %arch_file, i32 %route_type, i64 %det_routing_arch.0.0, i64 %det_routing_arch.0.1, i64 %det_routing_arch.0.2, i64 %det_routing_arch.0.3, i64 %det_routing_arch.0.4, %struct.s_segment_inf* %segment_inf, i64 %timing_inf.0.0, i64 %timing_inf.0.1, i64 %timing_inf.0.2, i64 %timing_inf.0.3, i64 %timing_inf.0.4, i32 %timing_inf.1) {
+entry:
+	br i1 false, label %bb278, label %bb344
+
+bb278:		; preds = %bb278, %entry
+	br i1 false, label %bb278, label %bb344
+
+bb344:		; preds = %bb278, %entry
+	%tmp38758 = load i16, i16* null, align 2		; <i16> [#uses=0]
+	ret void
+}

Added: llvm/trunk/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt < %s -newgvn | llvm-dis
+
+	%struct.TypHeader = type { i32, %struct.TypHeader**, [3 x i8], i8 }
+
+define %struct.TypHeader* @LtRec(%struct.TypHeader* %hdL, %struct.TypHeader* %hdR) {
+entry:
+	br i1 false, label %bb556.preheader, label %bb534.preheader
+
+bb534.preheader:		; preds = %entry
+	ret %struct.TypHeader* null
+
+bb556.preheader:		; preds = %entry
+	%tmp56119 = getelementptr %struct.TypHeader, %struct.TypHeader* %hdR, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp56220 = load i32, i32* %tmp56119		; <i32> [#uses=0]
+	br i1 false, label %bb.nph23, label %bb675.preheader
+
+bb.nph23:		; preds = %bb556.preheader
+	ret %struct.TypHeader* null
+
+bb656:		; preds = %bb675.outer, %bb656
+	%tmp678 = load i32, i32* %tmp677		; <i32> [#uses=0]
+	br i1 false, label %bb684, label %bb656
+
+bb684:		; preds = %bb675.outer, %bb656
+	br i1 false, label %bb924.preheader, label %bb675.outer
+
+bb675.outer:		; preds = %bb675.preheader, %bb684
+	%tmp67812 = load i32, i32* %tmp67711		; <i32> [#uses=0]
+	br i1 false, label %bb684, label %bb656
+
+bb675.preheader:		; preds = %bb556.preheader
+	%tmp67711 = getelementptr %struct.TypHeader, %struct.TypHeader* %hdR, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp677 = getelementptr %struct.TypHeader, %struct.TypHeader* %hdR, i32 0, i32 0		; <i32*> [#uses=1]
+	br label %bb675.outer
+
+bb924.preheader:		; preds = %bb684
+	ret %struct.TypHeader* null
+}

Added: llvm/trunk/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -newgvn | llvm-dis
+
+	%struct.ggBRDF = type { i32 (...)** }
+	%struct.ggBox3 = type { %struct.ggPoint3, %struct.ggPoint3 }
+	%struct.ggMaterialRecord = type { %struct.ggPoint2, %struct.ggBox3, %struct.ggBox3, %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggBRDF*, i32, i32, i32, i32 }
+	%struct.ggONB3 = type { %struct.ggPoint3, %struct.ggPoint3, %struct.ggPoint3 }
+	%struct.ggPoint2 = type { [2 x double] }
+	%struct.ggPoint3 = type { [3 x double] }
+	%struct.ggSpectrum = type { [8 x float] }
+	%struct.mrViewingHitRecord = type { double, %struct.ggPoint3, %struct.ggONB3, %struct.ggPoint2, double, %struct.ggSpectrum, %struct.ggSpectrum, i32, i32, i32, i32 }
+	%struct.mrXEllipticalCylinder = type { %struct.ggBRDF, float, float, float, float, float, float }
+
+define i32 @_ZNK21mrZEllipticalCylinder10viewingHitERK6ggRay3dddR18mrViewingHitRecordR16ggMaterialRecord(%struct.mrXEllipticalCylinder* %this, %struct.ggBox3* %ray, double %unnamed_arg, double %tmin, double %tmax, %struct.mrViewingHitRecord* %VHR, %struct.ggMaterialRecord* %unnamed_arg2) {
+entry:
+	%tmp80.i = getelementptr %struct.mrViewingHitRecord, %struct.mrViewingHitRecord* %VHR, i32 0, i32 1, i32 0, i32 0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %tmp80.i
+	br i1 false, label %return, label %cond_next.i
+
+cond_next.i:		; preds = %entry
+	br i1 false, label %return, label %cond_true
+
+cond_true:		; preds = %cond_next.i
+	%tmp3.i8 = getelementptr %struct.mrViewingHitRecord, %struct.mrViewingHitRecord* %VHR, i32 0, i32 1, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp46 = load double, double* %tmp3.i8		; <double> [#uses=0]
+	ret i32 1
+
+return:		; preds = %cond_next.i, %entry
+	ret i32 0
+}

Added: llvm/trunk/test/Transforms/NewGVN/2007-07-26-InterlockingLoops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2007-07-26-InterlockingLoops.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2007-07-26-InterlockingLoops.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2007-07-26-InterlockingLoops.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+
+ at last = external global [65 x i32*]
+
+define i32 @NextRootMove(i32 %wtm, i32 %x, i32 %y, i32 %z) {
+entry:
+        %A = alloca i32*
+	%tmp17618 = load i32*, i32** getelementptr ([65 x i32*], [65 x i32*]* @last, i32 0, i32 1), align 4
+        store i32* %tmp17618, i32** %A
+; CHECK: entry:
+; CHECK-NEXT: alloca i32
+; CHECK-NEXT: %tmp17618 = load
+; CHECK-NOT: load
+; CHECK-NOT: phi
+	br label %cond_true116
+
+cond_true116:
+   %cmp = icmp eq i32 %x, %y
+	br i1 %cmp, label %cond_true128, label %cond_true145
+
+cond_true128:
+	%tmp17625 = load i32*, i32** getelementptr ([65 x i32*], [65 x i32*]* @last, i32 0, i32 1), align 4
+        store i32* %tmp17625, i32** %A
+   %cmp1 = icmp eq i32 %x, %z
+	br i1 %cmp1 , label %bb98.backedge, label %return.loopexit
+
+bb98.backedge:
+	br label %cond_true116
+
+cond_true145:
+	%tmp17631 = load i32*, i32** getelementptr ([65 x i32*], [65 x i32*]* @last, i32 0, i32 1), align 4
+        store i32* %tmp17631, i32** %A
+	br i1 false, label %bb98.backedge, label %return.loopexit
+
+return.loopexit:
+	br label %return
+
+return:
+	ret i32 0
+}

Added: llvm/trunk/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; RUN: opt < %s -newgvn | llvm-dis
+
+ at bsLive = external global i32		; <i32*> [#uses=2]
+
+define i32 @bsR(i32 %n) {
+entry:
+	br i1 false, label %cond_next, label %bb19
+
+cond_next:		; preds = %entry
+	store i32 0, i32* @bsLive, align 4
+	br label %bb19
+
+bb19:		; preds = %cond_next, %entry
+	%tmp29 = load i32, i32* @bsLive, align 4		; <i32> [#uses=0]
+	ret i32 0
+}

Added: llvm/trunk/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -newgvn -S | FileCheck %s
+
+	%struct..0anon = type { i32 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+ at n_spills = external global i32		; <i32*> [#uses=2]
+
+define i32 @reload(%struct.rtx_def* %first, i32 %global, %struct.FILE* %dumpfile) {
+; CHECK-LABEL: @reload(
+; CHECK-NEXT:  cond_next2835.1:
+; CHECK-NEXT:    br label [[BB2928:%.*]]
+; CHECK:       bb2928:
+; CHECK-NEXT:    br i1 false, label [[COND_NEXT2943:%.*]], label [[COND_TRUE2935:%.*]]
+; CHECK:       cond_true2935:
+; CHECK-NEXT:    br label [[COND_NEXT2943]]
+; CHECK:       cond_next2943:
+; CHECK-NEXT:    br i1 false, label [[BB2982_PREHEADER:%.*]], label [[BB2928]]
+; CHECK:       bb2982.preheader:
+; CHECK-NEXT:    store i8 undef, i8* null
+; CHECK-NEXT:    ret i32 undef
+;
+cond_next2835.1:		; preds = %cond_next2861
+  %tmp2922 = load i32, i32* @n_spills, align 4		; <i32> [#uses=0]
+  br label %bb2928
+
+bb2928:		; preds = %cond_next2835.1, %cond_next2943
+  br i1 false, label %cond_next2943, label %cond_true2935
+
+cond_true2935:		; preds = %bb2928
+  br label %cond_next2943
+
+cond_next2943:		; preds = %cond_true2935, %bb2928
+  br i1 false, label %bb2982.preheader, label %bb2928
+
+bb2982.preheader:		; preds = %cond_next2943
+  %tmp298316 = load i32, i32* @n_spills, align 4		; <i32> [#uses=0]
+  ret i32 %tmp298316
+
+}

Added: llvm/trunk/test/Transforms/NewGVN/2007-07-30-PredIDom.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2007-07-30-PredIDom.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2007-07-30-PredIDom.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2007-07-30-PredIDom.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,274 @@
+; RUN: opt < %s -newgvn | llvm-dis
+
+	%"struct.Block::$_16" = type { i32 }
+	%struct.Exp = type { %struct.Exp_*, i32, i32, i32, %struct.Exp*, %struct.Exp*, %"struct.Exp::$_10", %"struct.Block::$_16", %"struct.Exp::$_12" }
+	%"struct.Exp::$_10" = type { %struct.Exp* }
+	%"struct.Exp::$_12" = type { %struct.Exp** }
+	%struct.Exp_ = type { i32, i32, i32, i32, %struct.Id* }
+	%struct.Id = type { i8*, i32, i32, i32, %"struct.Id::$_13" }
+	%"struct.Id::$_13" = type { double }
+
+define i8* @_ZN3Exp8toStringEj(%struct.Exp* %this, i32 %nextpc) {
+entry:
+	switch i32 0, label %bb970 [
+		 i32 1, label %bb
+		 i32 2, label %bb39
+		 i32 3, label %bb195
+		 i32 4, label %bb270
+		 i32 5, label %bb418
+		 i32 6, label %bb633
+		 i32 7, label %bb810
+		 i32 8, label %bb882
+		 i32 9, label %bb925
+	]
+
+bb:		; preds = %entry
+	store i8* null, i8** null
+	br label %return
+
+bb39:		; preds = %entry
+	br i1 false, label %cond_true, label %cond_false132
+
+cond_true:		; preds = %bb39
+	br i1 false, label %cond_true73, label %cond_false
+
+cond_true73:		; preds = %cond_true
+	br i1 false, label %cond_true108, label %cond_next
+
+cond_true108:		; preds = %cond_true73
+	br label %cond_next
+
+cond_next:		; preds = %cond_true108, %cond_true73
+	br label %cond_next131
+
+cond_false:		; preds = %cond_true
+	br label %cond_next131
+
+cond_next131:		; preds = %cond_false, %cond_next
+	br label %cond_next141
+
+cond_false132:		; preds = %bb39
+	br label %cond_next141
+
+cond_next141:		; preds = %cond_false132, %cond_next131
+	br i1 false, label %cond_true169, label %cond_false175
+
+cond_true169:		; preds = %cond_next141
+	br label %cond_next181
+
+cond_false175:		; preds = %cond_next141
+	br label %cond_next181
+
+cond_next181:		; preds = %cond_false175, %cond_true169
+	br i1 false, label %cond_true189, label %cond_next191
+
+cond_true189:		; preds = %cond_next181
+	br label %cond_next191
+
+cond_next191:		; preds = %cond_true189, %cond_next181
+	store i8* null, i8** null
+	br label %return
+
+bb195:		; preds = %entry
+	br i1 false, label %cond_true248, label %cond_false250
+
+cond_true248:		; preds = %bb195
+	br label %cond_next252
+
+cond_false250:		; preds = %bb195
+	br label %cond_next252
+
+cond_next252:		; preds = %cond_false250, %cond_true248
+	br i1 false, label %cond_true265, label %cond_next267
+
+cond_true265:		; preds = %cond_next252
+	br label %cond_next267
+
+cond_next267:		; preds = %cond_true265, %cond_next252
+	store i8* null, i8** null
+	br label %return
+
+bb270:		; preds = %entry
+	br i1 false, label %cond_true338, label %cond_false340
+
+cond_true338:		; preds = %bb270
+	br label %cond_next342
+
+cond_false340:		; preds = %bb270
+	br label %cond_next342
+
+cond_next342:		; preds = %cond_false340, %cond_true338
+	br i1 false, label %cond_true362, label %cond_false364
+
+cond_true362:		; preds = %cond_next342
+	br label %cond_next366
+
+cond_false364:		; preds = %cond_next342
+	br label %cond_next366
+
+cond_next366:		; preds = %cond_false364, %cond_true362
+	br i1 false, label %cond_true393, label %cond_next395
+
+cond_true393:		; preds = %cond_next366
+	br label %cond_next395
+
+cond_next395:		; preds = %cond_true393, %cond_next366
+	br i1 false, label %cond_true406, label %cond_next408
+
+cond_true406:		; preds = %cond_next395
+	br label %cond_next408
+
+cond_next408:		; preds = %cond_true406, %cond_next395
+	br i1 false, label %cond_true413, label %cond_next415
+
+cond_true413:		; preds = %cond_next408
+	br label %cond_next415
+
+cond_next415:		; preds = %cond_true413, %cond_next408
+	store i8* null, i8** null
+	br label %return
+
+bb418:		; preds = %entry
+	br i1 false, label %cond_true512, label %cond_false514
+
+cond_true512:		; preds = %bb418
+	br label %cond_next516
+
+cond_false514:		; preds = %bb418
+	br label %cond_next516
+
+cond_next516:		; preds = %cond_false514, %cond_true512
+	br i1 false, label %cond_true536, label %cond_false538
+
+cond_true536:		; preds = %cond_next516
+	br label %cond_next540
+
+cond_false538:		; preds = %cond_next516
+	br label %cond_next540
+
+cond_next540:		; preds = %cond_false538, %cond_true536
+	br i1 false, label %cond_true560, label %cond_false562
+
+cond_true560:		; preds = %cond_next540
+	br label %cond_next564
+
+cond_false562:		; preds = %cond_next540
+	br label %cond_next564
+
+cond_next564:		; preds = %cond_false562, %cond_true560
+	br i1 false, label %cond_true597, label %cond_next599
+
+cond_true597:		; preds = %cond_next564
+	br label %cond_next599
+
+cond_next599:		; preds = %cond_true597, %cond_next564
+	br i1 false, label %cond_true614, label %cond_next616
+
+cond_true614:		; preds = %cond_next599
+	br label %cond_next616
+
+cond_next616:		; preds = %cond_true614, %cond_next599
+	br i1 false, label %cond_true621, label %cond_next623
+
+cond_true621:		; preds = %cond_next616
+	br label %cond_next623
+
+cond_next623:		; preds = %cond_true621, %cond_next616
+	br i1 false, label %cond_true628, label %cond_next630
+
+cond_true628:		; preds = %cond_next623
+	br label %cond_next630
+
+cond_next630:		; preds = %cond_true628, %cond_next623
+	store i8* null, i8** null
+	br label %return
+
+bb633:		; preds = %entry
+	br i1 false, label %cond_true667, label %cond_next669
+
+cond_true667:		; preds = %bb633
+	br label %cond_next669
+
+cond_next669:		; preds = %cond_true667, %bb633
+	br i1 false, label %cond_true678, label %cond_next791
+
+cond_true678:		; preds = %cond_next669
+	br label %bb735
+
+bb679:		; preds = %bb735
+	br i1 false, label %cond_true729, label %cond_next731
+
+cond_true729:		; preds = %bb679
+	br label %cond_next731
+
+cond_next731:		; preds = %cond_true729, %bb679
+	br label %bb735
+
+bb735:		; preds = %cond_next731, %cond_true678
+	br i1 false, label %bb679, label %bb743
+
+bb743:		; preds = %bb735
+	br i1 false, label %cond_true788, label %cond_next790
+
+cond_true788:		; preds = %bb743
+	br label %cond_next790
+
+cond_next790:		; preds = %cond_true788, %bb743
+	br label %cond_next791
+
+cond_next791:		; preds = %cond_next790, %cond_next669
+	br i1 false, label %cond_true805, label %cond_next807
+
+cond_true805:		; preds = %cond_next791
+	br label %cond_next807
+
+cond_next807:		; preds = %cond_true805, %cond_next791
+	store i8* null, i8** null
+	br label %return
+
+bb810:		; preds = %entry
+	br i1 false, label %cond_true870, label %cond_next872
+
+cond_true870:		; preds = %bb810
+	br label %cond_next872
+
+cond_next872:		; preds = %cond_true870, %bb810
+	br i1 false, label %cond_true877, label %cond_next879
+
+cond_true877:		; preds = %cond_next872
+	br label %cond_next879
+
+cond_next879:		; preds = %cond_true877, %cond_next872
+	store i8* null, i8** null
+	br label %return
+
+bb882:		; preds = %entry
+	br i1 false, label %cond_true920, label %cond_next922
+
+cond_true920:		; preds = %bb882
+	br label %cond_next922
+
+cond_next922:		; preds = %cond_true920, %bb882
+	store i8* null, i8** null
+	br label %return
+
+bb925:		; preds = %entry
+	br i1 false, label %cond_true965, label %cond_next967
+
+cond_true965:		; preds = %bb925
+	br label %cond_next967
+
+cond_next967:		; preds = %cond_true965, %bb925
+	store i8* null, i8** null
+	br label %return
+
+bb970:		; preds = %entry
+	unreachable
+		; No predecessors!
+	store i8* null, i8** null
+	br label %return
+
+return:		; preds = %0, %cond_next967, %cond_next922, %cond_next879, %cond_next807, %cond_next630, %cond_next415, %cond_next267, %cond_next191, %bb
+	%retval980 = load i8*, i8** null		; <i8*> [#uses=1]
+	ret i8* %retval980
+}

Added: llvm/trunk/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,315 @@
+; XFAIL: *
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+
+	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
+ at debug = external constant i32		; <i32*> [#uses=0]
+ at counters = external constant i32		; <i32*> [#uses=1]
+ at trialx = external global [17 x i32]		; <[17 x i32]*> [#uses=1]
+ at dummy1 = external global [7 x i32]		; <[7 x i32]*> [#uses=0]
+ at dummy2 = external global [4 x i32]		; <[4 x i32]*> [#uses=0]
+ at unacceptable = external global i32		; <i32*> [#uses=0]
+ at isa = external global [13 x %struct.anon]		; <[13 x %struct.anon]*> [#uses=3]
+ at .str = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str1 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+ at .str2 = external constant [1 x i8]		; <[1 x i8]*> [#uses=0]
+ at .str3 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str4 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+ at .str5 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str6 = external constant [2 x i8]		; <[2 x i8]*> [#uses=0]
+ at .str7 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str8 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str9 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str10 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str11 = external constant [2 x i8]		; <[2 x i8]*> [#uses=0]
+ at .str12 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str13 = external constant [2 x i8]		; <[2 x i8]*> [#uses=0]
+ at .str14 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+ at .str15 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+ at .str16 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str17 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str18 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+ at .str19 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str20 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str21 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str22 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str23 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+ at .str24 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str25 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+ at .str26 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+ at .str27 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+ at r = external global [17 x i32]		; <[17 x i32]*> [#uses=0]
+ at .str28 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+ at .str29 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+ at pgm = external global [5 x { i32, [3 x i32] }]		; <[5 x { i32, [3 x i32] }]*> [#uses=4]
+ at .str30 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+ at .str31 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+ at .str32 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+ at .str33 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+ at .str34 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+ at numi = external global i32		; <i32*> [#uses=7]
+ at .str35 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+ at counter = external global [5 x i32]		; <[5 x i32]*> [#uses=2]
+ at itrialx.2510 = external global i32		; <i32*> [#uses=0]
+ at .str36 = external constant [43 x i8]		; <[43 x i8]*> [#uses=0]
+ at .str37 = external constant [42 x i8]		; <[42 x i8]*> [#uses=0]
+ at corr_result = external global i32		; <i32*> [#uses=0]
+ at .str38 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+ at .str39 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+ at .str40 = external constant [47 x i8]		; <[47 x i8]*> [#uses=0]
+ at correct_result = external global [17 x i32]		; <[17 x i32]*> [#uses=1]
+ at .str41 = external constant [46 x i8]		; <[46 x i8]*> [#uses=0]
+ at .str42 = external constant [32 x i8]		; <[32 x i8]*> [#uses=0]
+ at .str43 = external constant [44 x i8]		; <[44 x i8]*> [#uses=1]
+ at .str44 = external constant [21 x i8]		; <[21 x i8]*> [#uses=1]
+ at .str45 = external constant [12 x i8]		; <[12 x i8]*> [#uses=1]
+ at .str46 = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
+ at .str47 = external constant [12 x i8]		; <[12 x i8]*> [#uses=1]
+
+declare i32 @neg(i32, i32, i32)
+
+declare i32 @Not(i32, i32, i32)
+
+declare i32 @pop(i32, i32, i32)
+
+declare i32 @nlz(i32, i32, i32)
+
+declare i32 @rev(i32, i32, i32)
+
+declare i32 @add(i32, i32, i32)
+
+declare i32 @sub(i32, i32, i32)
+
+declare i32 @mul(i32, i32, i32)
+
+declare i32 @divide(i32, i32, i32)
+
+declare i32 @divu(i32, i32, i32)
+
+declare i32 @And(i32, i32, i32)
+
+declare i32 @Or(i32, i32, i32)
+
+declare i32 @Xor(i32, i32, i32)
+
+declare i32 @rotl(i32, i32, i32)
+
+declare i32 @shl(i32, i32, i32)
+
+declare i32 @shr(i32, i32, i32)
+
+declare i32 @shrs(i32, i32, i32)
+
+declare i32 @cmpeq(i32, i32, i32)
+
+declare i32 @cmplt(i32, i32, i32)
+
+declare i32 @cmpltu(i32, i32, i32)
+
+declare i32 @seleq(i32, i32, i32)
+
+declare i32 @sellt(i32, i32, i32)
+
+declare i32 @selle(i32, i32, i32)
+
+declare void @print_expr(i32)
+
+declare i32 @printf(i8*, ...)
+
+declare i32 @putchar(i32)
+
+declare void @print_pgm()
+
+declare void @simulate_one_instruction(i32)
+
+declare i32 @check(i32)
+
+declare i32 @puts(i8*)
+
+declare void @fix_operands(i32)
+
+declare void @abort()
+
+declare i32 @increment()
+
+declare i32 @search()
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%argc_addr = alloca i32		; <i32*> [#uses=1]
+	%argv_addr = alloca i8**		; <i8***> [#uses=1]
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=21]
+	%num_sol = alloca i32, align 4		; <i32*> [#uses=4]
+	%total = alloca i32, align 4		; <i32*> [#uses=4]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %argc, i32* %argc_addr
+	store i8** %argv, i8*** %argv_addr
+	store i32 0, i32* %num_sol
+	store i32 1, i32* @numi
+	br label %bb91
+
+bb:		; preds = %cond_next97
+	%tmp1 = load i32, i32* @numi		; <i32> [#uses=1]
+	%tmp2 = getelementptr [44 x i8], [44 x i8]* @.str43, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp3 = call i32 (i8*, ...) @printf( i8* %tmp2, i32 %tmp1 )		; <i32> [#uses=0]
+	store i32 0, i32* %i
+	br label %bb13
+
+bb4:		; preds = %bb13
+	%tmp5 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp6 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp7 = getelementptr [17 x i32], [17 x i32]* @trialx, i32 0, i32 %tmp6		; <i32*> [#uses=1]
+	%tmp8 = load i32, i32* %tmp7		; <i32> [#uses=1]
+	%tmp9 = call i32 @userfun( i32 %tmp8 )		; <i32> [#uses=1]
+	%tmp10 = getelementptr [17 x i32], [17 x i32]* @correct_result, i32 0, i32 %tmp5		; <i32*> [#uses=1]
+	store i32 %tmp9, i32* %tmp10
+	%tmp11 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp12 = add i32 %tmp11, 1		; <i32> [#uses=1]
+	store i32 %tmp12, i32* %i
+	br label %bb13
+
+bb13:		; preds = %bb4, %bb
+	%tmp14 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp15 = icmp sle i32 %tmp14, 16		; <i1> [#uses=1]
+	%tmp1516 = zext i1 %tmp15 to i32		; <i32> [#uses=1]
+	%toBool = icmp ne i32 %tmp1516, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb4, label %bb17
+
+bb17:		; preds = %bb13
+	store i32 0, i32* %i
+	br label %bb49
+
+bb18:		; preds = %bb49
+	%tmp19 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp20 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp19		; <{ i32, [3 x i32] }*> [#uses=1]
+	%tmp21 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp20, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp21
+	%tmp22 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp23 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0		; <%struct.anon*> [#uses=1]
+	%tmp24 = getelementptr %struct.anon, %struct.anon* %tmp23, i32 0, i32 3		; <[3 x i32]*> [#uses=1]
+	%tmp25 = getelementptr [3 x i32], [3 x i32]* %tmp24, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp26 = load i32, i32* %tmp25		; <i32> [#uses=1]
+	%tmp27 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp22		; <{ i32, [3 x i32] }*> [#uses=1]
+	%tmp28 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp27, i32 0, i32 1		; <[3 x i32]*> [#uses=1]
+	%tmp29 = getelementptr [3 x i32], [3 x i32]* %tmp28, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 %tmp26, i32* %tmp29
+	%tmp30 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp31 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0		; <%struct.anon*> [#uses=1]
+	%tmp32 = getelementptr %struct.anon, %struct.anon* %tmp31, i32 0, i32 3		; <[3 x i32]*> [#uses=1]
+	%tmp33 = getelementptr [3 x i32], [3 x i32]* %tmp32, i32 0, i32 1		; <i32*> [#uses=1]
+	%tmp34 = load i32, i32* %tmp33		; <i32> [#uses=1]
+	%tmp35 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp30		; <{ i32, [3 x i32] }*> [#uses=1]
+	%tmp36 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp35, i32 0, i32 1		; <[3 x i32]*> [#uses=1]
+	%tmp37 = getelementptr [3 x i32], [3 x i32]* %tmp36, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %tmp34, i32* %tmp37
+	%tmp38 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp39 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0		; <%struct.anon*> [#uses=1]
+	%tmp40 = getelementptr %struct.anon, %struct.anon* %tmp39, i32 0, i32 3		; <[3 x i32]*> [#uses=1]
+	%tmp41 = getelementptr [3 x i32], [3 x i32]* %tmp40, i32 0, i32 2		; <i32*> [#uses=1]
+	%tmp42 = load i32, i32* %tmp41		; <i32> [#uses=1]
+	%tmp43 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp38		; <{ i32, [3 x i32] }*> [#uses=1]
+	%tmp44 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp43, i32 0, i32 1		; <[3 x i32]*> [#uses=1]
+	%tmp45 = getelementptr [3 x i32], [3 x i32]* %tmp44, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 %tmp42, i32* %tmp45
+	%tmp46 = load i32, i32* %i		; <i32> [#uses=1]
+	call void @fix_operands( i32 %tmp46 )
+	%tmp47 = load i32, i32* %i		; <i32> [#uses=1]
+; CHECK: %tmp47 = phi i32 [ %tmp48, %bb18 ], [ 0, %bb17 ]
+	%tmp48 = add i32 %tmp47, 1		; <i32> [#uses=1]
+	store i32 %tmp48, i32* %i
+	br label %bb49
+
+bb49:		; preds = %bb18, %bb17
+	%tmp50 = load i32, i32* @numi		; <i32> [#uses=1]
+	%tmp51 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp52 = icmp slt i32 %tmp51, %tmp50		; <i1> [#uses=1]
+	%tmp5253 = zext i1 %tmp52 to i32		; <i32> [#uses=1]
+	%toBool54 = icmp ne i32 %tmp5253, 0		; <i1> [#uses=1]
+	br i1 %toBool54, label %bb18, label %bb55
+
+bb55:		; preds = %bb49
+	%tmp56 = call i32 @search( )		; <i32> [#uses=1]
+	store i32 %tmp56, i32* %num_sol
+	%tmp57 = getelementptr [21 x i8], [21 x i8]* @.str44, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp58 = load i32, i32* %num_sol		; <i32> [#uses=1]
+	%tmp59 = call i32 (i8*, ...) @printf( i8* %tmp57, i32 %tmp58 )		; <i32> [#uses=0]
+	%tmp60 = load i32, i32* @counters		; <i32> [#uses=1]
+	%tmp61 = icmp ne i32 %tmp60, 0		; <i1> [#uses=1]
+	%tmp6162 = zext i1 %tmp61 to i32		; <i32> [#uses=1]
+	%toBool63 = icmp ne i32 %tmp6162, 0		; <i1> [#uses=1]
+	br i1 %toBool63, label %cond_true, label %cond_next
+
+cond_true:		; preds = %bb55
+	store i32 0, i32* %total
+	%tmp64 = getelementptr [12 x i8], [12 x i8]* @.str45, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp65 = call i32 (i8*, ...) @printf( i8* %tmp64 )		; <i32> [#uses=0]
+	store i32 0, i32* %i
+	br label %bb79
+
+bb66:		; preds = %bb79
+	%tmp67 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp68 = getelementptr [5 x i32], [5 x i32]* @counter, i32 0, i32 %tmp67		; <i32*> [#uses=1]
+	%tmp69 = load i32, i32* %tmp68		; <i32> [#uses=1]
+	%tmp70 = getelementptr [5 x i8], [5 x i8]* @.str46, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp71 = call i32 (i8*, ...) @printf( i8* %tmp70, i32 %tmp69 )		; <i32> [#uses=0]
+	%tmp72 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp73 = getelementptr [5 x i32], [5 x i32]* @counter, i32 0, i32 %tmp72		; <i32*> [#uses=1]
+	%tmp74 = load i32, i32* %tmp73		; <i32> [#uses=1]
+	%tmp75 = load i32, i32* %total		; <i32> [#uses=1]
+	%tmp76 = add i32 %tmp74, %tmp75		; <i32> [#uses=1]
+	store i32 %tmp76, i32* %total
+	%tmp77 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp78 = add i32 %tmp77, 1		; <i32> [#uses=1]
+	store i32 %tmp78, i32* %i
+	br label %bb79
+
+bb79:		; preds = %bb66, %cond_true
+	%tmp80 = load i32, i32* @numi		; <i32> [#uses=1]
+	%tmp81 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp82 = icmp slt i32 %tmp81, %tmp80		; <i1> [#uses=1]
+	%tmp8283 = zext i1 %tmp82 to i32		; <i32> [#uses=1]
+	%toBool84 = icmp ne i32 %tmp8283, 0		; <i1> [#uses=1]
+	br i1 %toBool84, label %bb66, label %bb85
+
+bb85:		; preds = %bb79
+	%tmp86 = getelementptr [12 x i8], [12 x i8]* @.str47, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp87 = load i32, i32* %total		; <i32> [#uses=1]
+	%tmp88 = call i32 (i8*, ...) @printf( i8* %tmp86, i32 %tmp87 )		; <i32> [#uses=0]
+	br label %cond_next
+
+cond_next:		; preds = %bb85, %bb55
+	%tmp89 = load i32, i32* @numi		; <i32> [#uses=1]
+	%tmp90 = add i32 %tmp89, 1		; <i32> [#uses=1]
+	store i32 %tmp90, i32* @numi
+	br label %bb91
+
+bb91:		; preds = %cond_next, %entry
+	%tmp92 = load i32, i32* @numi		; <i32> [#uses=1]
+	%tmp93 = icmp sgt i32 %tmp92, 5		; <i1> [#uses=1]
+	%tmp9394 = zext i1 %tmp93 to i32		; <i32> [#uses=1]
+	%toBool95 = icmp ne i32 %tmp9394, 0		; <i1> [#uses=1]
+	br i1 %toBool95, label %cond_true96, label %cond_next97
+
+cond_true96:		; preds = %bb91
+	br label %bb102
+
+cond_next97:		; preds = %bb91
+	%tmp98 = load i32, i32* %num_sol		; <i32> [#uses=1]
+	%tmp99 = icmp eq i32 %tmp98, 0		; <i1> [#uses=1]
+	%tmp99100 = zext i1 %tmp99 to i32		; <i32> [#uses=1]
+	%toBool101 = icmp ne i32 %tmp99100, 0		; <i1> [#uses=1]
+	br i1 %toBool101, label %bb, label %bb102
+
+bb102:		; preds = %cond_next97, %cond_true96
+	store i32 0, i32* %tmp
+	%tmp103 = load i32, i32* %tmp		; <i32> [#uses=1]
+	store i32 %tmp103, i32* %retval
+	br label %return
+
+return:		; preds = %bb102
+	%retval104 = load i32, i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval104
+}
+
+declare i32 @userfun(i32)

Added: llvm/trunk/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+
+ at img_width = external global i16		; <i16*> [#uses=2]
+
+define i32 @smpUMHEXBipredIntegerPelBlockMotionSearch(i16* %cur_pic, i16 signext  %ref, i32 %list, i32 %pic_pix_x, i32 %pic_pix_y, i32 %blocktype, i16 signext  %pred_mv_x1, i16 signext  %pred_mv_y1, i16 signext  %pred_mv_x2, i16 signext  %pred_mv_y2, i16* %mv_x, i16* %mv_y, i16* %s_mv_x, i16* %s_mv_y, i32 %search_range, i32 %min_mcost, i32 %lambda_factor) {
+cond_next143:		; preds = %entry
+	store i16 0, i16* @img_width, align 2
+	br i1 false, label %cond_next449, label %cond_false434
+
+cond_false434:		; preds = %cond_true415
+	br label %cond_next449
+
+cond_next449:		; preds = %cond_false434, %cond_true415
+	br i1 false, label %cond_next698, label %cond_false470
+
+cond_false470:		; preds = %cond_next449
+	br label %cond_next698
+
+cond_next698:		; preds = %cond_true492
+	%tmp701 = load i16, i16* @img_width, align 2		; <i16> [#uses=0]
+; CHECK-NOT: %tmp701 =
+	ret i32 0
+}

Added: llvm/trunk/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; XFAIL: *
+; RUN: opt < %s -newgvn -S | FileCheck %s
+; PR1996
+
+%struct.anon = type { i32, i8, i8, i8, i8 }
+
+define i32 @a() {
+entry:
+        %c = alloca %struct.anon                ; <%struct.anon*> [#uses=2]
+        %tmp = getelementptr %struct.anon, %struct.anon* %c, i32 0, i32 0             ; <i32*> [#uses=1]
+        %tmp1 = getelementptr i32, i32* %tmp, i32 1          ; <i32*> [#uses=2]
+        %tmp2 = load i32, i32* %tmp1, align 4                ; <i32> [#uses=1]
+; CHECK-NOT: load
+        %tmp3 = or i32 %tmp2, 11                ; <i32> [#uses=1]
+        %tmp4 = and i32 %tmp3, -21              ; <i32> [#uses=1]
+        store i32 %tmp4, i32* %tmp1, align 4
+        %call = call i32 (...) @x( %struct.anon* %c )          ; <i32> [#uses=0]
+        ret i32 undef
+}
+
+
+declare i32 @x(...)

Added: llvm/trunk/test/Transforms/NewGVN/2008-02-13-NewPHI.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2008-02-13-NewPHI.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2008-02-13-NewPHI.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2008-02-13-NewPHI.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt < %s -newgvn
+; PR2032
+
+define i32 @sscal(i32 %n, double %sa1, float* %sx, i32 %incx) {
+entry:
+	%sx_addr = alloca float*		; <float**> [#uses=3]
+	store float* %sx, float** %sx_addr, align 4
+	br label %bb33
+
+bb:		; preds = %bb33
+	%tmp27 = load float*, float** %sx_addr, align 4		; <float*> [#uses=1]
+	store float 0.000000e+00, float* %tmp27, align 4
+	store float* null, float** %sx_addr, align 4
+	br label %bb33
+
+bb33:		; preds = %bb, %entry
+	br i1 false, label %bb, label %return
+
+return:		; preds = %bb33
+	%retval59 = load i32, i32* null, align 4		; <i32> [#uses=1]
+	ret i32 %retval59
+}

Added: llvm/trunk/test/Transforms/NewGVN/2008-07-02-Unreachable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2008-07-02-Unreachable.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2008-07-02-Unreachable.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2008-07-02-Unreachable.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+; PR2503
+
+ at g_3 = external global i8		; <i8*> [#uses=2]
+
+define i8 @func_1(i32 %x, i32 %y) nounwind  {
+entry:
+  %A = alloca i8
+    %cmp = icmp eq i32 %x, %y
+	br i1 %cmp, label %ifelse, label %ifthen
+
+ifthen:		; preds = %entry
+	br label %ifend
+
+ifelse:		; preds = %entry
+	%tmp3 = load i8, i8* @g_3		; <i8> [#uses=0]
+        store i8 %tmp3, i8* %A
+	br label %afterfor
+
+forcond:		; preds = %forinc
+	br i1 false, label %afterfor, label %forbody
+
+forbody:		; preds = %forcond
+	br label %forinc
+
+forinc:		; preds = %forbody
+	br label %forcond
+
+afterfor:		; preds = %forcond, %forcond.thread
+	%tmp10 = load i8, i8* @g_3		; <i8> [#uses=0]
+	ret i8 %tmp10
+; CHECK: ret i8 %tmp3
+
+ifend:		; preds = %afterfor, %ifthen
+	ret i8 0
+}

Added: llvm/trunk/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt < %s -newgvn -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+	%struct.anon = type { i8*, i32 }
+	%struct.d_print_info = type { i32, i8*, i32, i32, %struct.d_print_template*, %struct.d_print_mod*, i32 }
+	%struct.d_print_mod = type { %struct.d_print_mod*, %struct.demangle_component*, i32, %struct.d_print_template* }
+	%struct.d_print_template = type { %struct.d_print_template*, %struct.demangle_component* }
+	%struct.demangle_component = type { i32, { %struct.anon } }
+
+define void @d_print_mod_list(%struct.d_print_info* %dpi, %struct.d_print_mod* %mods, i32 %suffix) nounwind {
+entry:
+	%0 = getelementptr %struct.d_print_info, %struct.d_print_info* %dpi, i32 0, i32 1		; <i8**> [#uses=1]
+	br i1 false, label %return, label %bb
+
+bb:		; preds = %entry
+	%1 = load i8*, i8** %0, align 4		; <i8*> [#uses=0]
+	%2 = getelementptr %struct.d_print_info, %struct.d_print_info* %dpi, i32 0, i32 1		; <i8**> [#uses=0]
+	br label %bb21
+
+bb21:		; preds = %bb21, %bb
+	br label %bb21
+
+return:		; preds = %entry
+	ret void
+}
+
+; CHECK: define void @d_print_mod_list(%struct.d_print_info* %dpi, %struct.d_print_mod* %mods, i32 %suffix) #0 {
+; CHECK: entry:
+; CHECK:   %0 = getelementptr %struct.d_print_info, %struct.d_print_info* %dpi, i32 0, i32 1
+; CHECK:   br i1 false, label %return, label %bb
+; CHECK: bb:
+; CHECK:   br label %bb21
+; CHECK: bb21:
+; CHECK:   br label %bb21
+; CHECK: return:
+; CHECK:   ret void
+; CHECK: }

Added: llvm/trunk/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,35 @@
+; RUN: opt < %s -newgvn | llvm-dis
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %bb84
+
+bb41:		; preds = %bb82
+	%tmp = load i8, i8* %opt.0, align 1		; <i8> [#uses=0]
+	%tmp1 = getelementptr i8, i8* %opt.0, i32 1		; <i8*> [#uses=2]
+	switch i32 0, label %bb81 [
+		i32 102, label %bb82
+		i32 110, label %bb79
+		i32 118, label %bb80
+	]
+
+bb79:		; preds = %bb41
+	br label %bb82
+
+bb80:		; preds = %bb41
+	ret i32 0
+
+bb81:		; preds = %bb41
+	ret i32 1
+
+bb82:		; preds = %bb84, %bb79, %bb41
+	%opt.0 = phi i8* [ %tmp3, %bb84 ], [ %tmp1, %bb79 ], [ %tmp1, %bb41 ]		; <i8*> [#uses=3]
+	%tmp2 = load i8, i8* %opt.0, align 1		; <i8> [#uses=0]
+	br i1 false, label %bb84, label %bb41
+
+bb84:		; preds = %bb82, %entry
+	%tmp3 = getelementptr i8, i8* null, i32 1		; <i8*> [#uses=1]
+	br label %bb82
+}

Added: llvm/trunk/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt < %s -newgvn | llvm-dis
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+ at sort_value = external global [256 x i32], align 32		; <[256 x i32]*> [#uses=2]
+
+define i32 @Quiesce(i32 %alpha, i32 %beta, i32 %wtm, i32 %ply) nounwind {
+entry:
+	br label %bb22
+
+bb22:		; preds = %bb23, %bb22, %entry
+	br i1 false, label %bb23, label %bb22
+
+bb23:		; preds = %bb23, %bb22
+	%sortv.233 = phi i32* [ getelementptr ([256 x i32], [256 x i32]* @sort_value, i32 0, i32 0), %bb22 ], [ %sortv.2, %bb23 ]		; <i32*> [#uses=1]
+	%0 = load i32, i32* %sortv.233, align 4		; <i32> [#uses=0]
+	%sortv.2 = getelementptr [256 x i32], [256 x i32]* @sort_value, i32 0, i32 0		; <i32*> [#uses=1]
+	br i1 false, label %bb23, label %bb22
+}

Added: llvm/trunk/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt < %s -newgvn | llvm-dis
+; Cached results must be added to and verified against the visited sets.
+; PR3217
+
+define fastcc void @gen_field_die(i32* %decl) nounwind {
+entry:
+	br i1 false, label %bb203, label %bb202
+
+bb202:		; preds = %entry
+	unreachable
+
+bb203:		; preds = %entry
+	%tmp = getelementptr i32, i32* %decl, i32 1		; <i32*> [#uses=1]
+	%tmp1 = load i32, i32* %tmp, align 4		; <i32> [#uses=0]
+	br i1 false, label %bb207, label %bb204
+
+bb204:		; preds = %bb203
+	%tmp2 = getelementptr i32, i32* %decl, i32 1		; <i32*> [#uses=1]
+	br label %bb208
+
+bb207:		; preds = %bb203
+	br label %bb208
+
+bb208:		; preds = %bb207, %bb204
+	%iftmp.1374.0.in = phi i32* [ null, %bb207 ], [ %tmp2, %bb204 ]		; <i32*> [#uses=1]
+	%iftmp.1374.0 = load i32, i32* %iftmp.1374.0.in		; <i32> [#uses=0]
+	unreachable
+}

Added: llvm/trunk/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,55 @@
+; RUN: opt < %s -newgvn | llvm-dis
+; PR3358
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.re_pattern_buffer = type { i8*, i64, i64, i64, i8*, i8*, i64, i8 }
+	%struct.re_registers = type { i32, i32*, i32* }
+
+define fastcc i32 @byte_re_match_2_internal(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %pos, %struct.re_registers* %regs, i32 %stop) nounwind {
+entry:
+	br label %bb159
+
+succeed_label:		; preds = %bb159
+	ret i32 0
+
+bb159:		; preds = %bb664, %bb554, %bb159, %bb159, %bb159, %entry
+	%d.0 = phi i8* [ null, %entry ], [ %d.0, %bb159 ], [ %d.0, %bb554 ], [ %d.0, %bb159 ], [ %d.0, %bb159 ], [ %d.12, %bb664 ]		; <i8*> [#uses=5]
+	switch i32 0, label %bb661 [
+		i32 0, label %bb159
+		i32 1, label %succeed_label
+		i32 13, label %bb159
+		i32 14, label %bb159
+		i32 16, label %bb411
+		i32 24, label %bb622
+		i32 28, label %bb543
+	]
+
+bb411:		; preds = %bb411, %bb159
+	br label %bb411
+
+bb543:		; preds = %bb159
+	br i1 false, label %bb549, label %bb550
+
+bb549:		; preds = %bb543
+	br label %bb554
+
+bb550:		; preds = %bb543
+	br i1 false, label %bb554, label %bb552
+
+bb552:		; preds = %bb550
+	%0 = load i8, i8* %d.0, align 8		; <i8> [#uses=0]
+	br label %bb554
+
+bb554:		; preds = %bb552, %bb550, %bb549
+	br i1 false, label %bb159, label %bb661
+
+bb622:		; preds = %bb622, %bb159
+	br label %bb622
+
+bb661:		; preds = %bb554, %bb159
+	%d.12 = select i1 false, i8* null, i8* null		; <i8*> [#uses=1]
+	br label %bb664
+
+bb664:		; preds = %bb664, %bb661
+	br i1 false, label %bb159, label %bb664
+}

Added: llvm/trunk/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,100 @@
+; RUN: opt < %s -newgvn | llvm-dis
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct..4sPragmaType = type { i8*, i32 }
+	%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
+	%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
+	%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
+	%struct.AuxData = type { i8*, void (i8*)* }
+	%struct.Bitvec = type { i32, i32, i32, { [125 x i32] } }
+	%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
+	%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
+	%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
+	%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
+	%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
+	%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
+	%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
+	%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
+	%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
+	%struct.Context = type { i64, i32, %struct.Fifo }
+	%struct.CountCtx = type { i64 }
+	%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
+	%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
+	%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..4sPragmaType, %struct..4sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
+	%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
+	%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
+	%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
+	%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
+	%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
+	%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
+	%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
+	%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
+	%struct.IdList = type { %struct..4sPragmaType*, i32, i32 }
+	%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
+	%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
+	%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
+	%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.PgHdr*, i32, %struct.MemPage* }
+	%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
+	%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, { i32 } }
+	%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.PgHdr*, %struct.PgHdr*, %struct.PgHdr*, i64, i64, i64, i64, i64, i32, void (%struct.PgHdr*, i32)*, void (%struct.PgHdr*, i32)*, i32, %struct.PgHdr**, i8*, [16 x i8] }
+	%struct.PagerLruLink = type { %struct.PgHdr*, %struct.PgHdr* }
+	%struct.PagerLruList = type { %struct.PgHdr*, %struct.PgHdr*, %struct.PgHdr* }
+	%struct.Parse = type { %struct.sqlite3*, i32, i8*, %struct.Vdbe*, i8, i8, i8, i8, i8, i8, i8, [8 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x i32], i32, %struct.TableLock*, i32, i32, i32, i32, i32, %struct.Expr**, i8, %struct..4sPragmaType, %struct..4sPragmaType, %struct..4sPragmaType, i8*, i8*, %struct.Table*, %struct.Trigger*, %struct.TriggerStack*, i8*, %struct..4sPragmaType, i8, %struct.Table*, i32 }
+	%struct.PgHdr = type { %struct.Pager*, i32, %struct.PgHdr*, %struct.PgHdr*, %struct.PagerLruLink, %struct.PgHdr*, i8, i8, i8, i8, i8, i16, %struct.PgHdr*, %struct.PgHdr*, i8* }
+	%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
+	%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
+	%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+	%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
+	%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
+	%struct.TableLock = type { i32, i32, i8, i8* }
+	%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..4sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
+	%struct.TriggerStack = type { %struct.Table*, i32, i32, i32, i32, i32, i32, %struct.Trigger*, %struct.TriggerStack* }
+	%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..4sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
+	%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
+	%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
+	%struct._OvflCell = type { i8*, i16 }
+	%struct._ht = type { i32, %struct.HashElem* }
+	%struct.anon = type { double }
+	%struct.sColMap = type { i32, i8* }
+	%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %struct.anon, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+	%struct.sqlite3InitInfo = type { i32, i32, i8 }
+	%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
+	%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
+	%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+	%struct.sqlite3_index_constraint_usage = type { i32, i8 }
+	%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
+	%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
+	%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
+	%struct.sqlite3_mutex = type opaque
+	%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
+	%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
+	%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
+
+define fastcc void @sqlite3Insert(%struct.Parse* %pParse, %struct.SrcList* %pTabList, %struct.ExprList* %pList, %struct.Select* %pSelect, %struct.IdList* %pColumn, i32 %onError) nounwind {
+entry:
+	br i1 false, label %bb54, label %bb69.loopexit
+
+bb54:		; preds = %entry
+	br label %bb69.loopexit
+
+bb59:		; preds = %bb63.preheader
+	%0 = load %struct..4sPragmaType*, %struct..4sPragmaType** %3, align 4		; <%struct..4sPragmaType*> [#uses=0]
+	br label %bb65
+
+bb65:		; preds = %bb63.preheader, %bb59
+	%1 = load %struct..4sPragmaType*, %struct..4sPragmaType** %4, align 4		; <%struct..4sPragmaType*> [#uses=0]
+	br i1 false, label %bb67, label %bb63.preheader
+
+bb67:		; preds = %bb65
+	%2 = getelementptr %struct.IdList, %struct.IdList* %pColumn, i32 0, i32 0		; <%struct..4sPragmaType**> [#uses=0]
+	unreachable
+
+bb69.loopexit:		; preds = %bb54, %entry
+	%3 = getelementptr %struct.IdList, %struct.IdList* %pColumn, i32 0, i32 0		; <%struct..4sPragmaType**> [#uses=1]
+	%4 = getelementptr %struct.IdList, %struct.IdList* %pColumn, i32 0, i32 0		; <%struct..4sPragmaType**> [#uses=1]
+	br label %bb63.preheader
+
+bb63.preheader:		; preds = %bb69.loopexit, %bb65
+	br i1 false, label %bb59, label %bb65
+}

Added: llvm/trunk/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,110 @@
+; RUN: opt < %s -newgvn -disable-output
+; PR3775
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%"struct.__gnu_cxx::hash<void*>" = type <{ i8 }>
+	%struct.__sched_param = type { i32 }
+	%struct._pthread_descr_struct = type opaque
+	%struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, i8*, i32 }
+	%struct.pthread_mutex_t = type { i32, i32, %struct._pthread_descr_struct*, i32, %llvm.dbg.anchor.type }
+	%"struct.std::_Rb_tree<void*,std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<void*>,std::allocator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >" = type { %"struct.std::_Rb_tree<void*,std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<void*>,std::allocator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >::_Rb_tree_impl<std::less<void*>,false>" }
+	%"struct.std::_Rb_tree<void*,std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<void*>,std::allocator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >::_Rb_tree_impl<std::less<void*>,false>" = type { %"struct.__gnu_cxx::hash<void*>", %"struct.std::_Rb_tree_node_base", i32 }
+	%"struct.std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >" = type { %"struct.std::_Rb_tree_node_base"* }
+	%"struct.std::_Rb_tree_node_base" = type { i32, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }
+	%"struct.std::pair<std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>" = type { %"struct.std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >", i8 }
+	%"struct.std::pair<void* const,void*>" = type { i8*, i8* }
+
+ at _ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*), i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+ at _ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32), i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
+ at _ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*), i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
+ at _ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = weak alias i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*), i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+ at _ZL22__gthrw_pthread_cancelm = weak alias i32 (i32), i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=0]
+ at _ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+ at _ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+ at _ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+ at _ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.__sched_param*), i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
+ at _ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*), i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
+ at _ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32), i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
+ at _ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_init		; <i32 (%struct.__sched_param*)*> [#uses=0]
+ at _ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.__sched_param*, i32), i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
+ at _ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy		; <i32 (%struct.__sched_param*)*> [#uses=0]
+
+declare fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind readnone
+
+define fastcc void @_ZNSt8_Rb_treeIPvSt4pairIKS0_S0_ESt10_Select1stIS3_ESt4lessIS0_ESaIS3_EE16_M_insert_uniqueERKS3_(%"struct.std::pair<std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>"* noalias nocapture sret %agg.result, %"struct.std::_Rb_tree<void*,std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<void*>,std::allocator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >"* %this, %"struct.std::pair<void* const,void*>"* %__v) nounwind {
+entry:
+	br i1 false, label %bb7, label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 false, label %bb5, label %bb
+
+bb5:		; preds = %bb
+	call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind
+	br i1 false, label %bb11, label %bb7
+
+bb7:		; preds = %bb5, %entry
+	br label %bb11
+
+bb11:		; preds = %bb7, %bb5
+	call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind
+	unreachable
+}
+
+define i32 @pthread_once(i32*, void ()*) {
+       ret i32 0
+}
+
+define i8* @pthread_getspecific(i32) {
+       ret i8* null
+}
+
+define i32 @pthread_setspecific(i32, i8*) {
+        ret i32 0
+}
+
+define i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*) {
+       ret i32 0
+}
+
+define i32 @pthread_cancel(i32) {
+      ret i32 0
+}
+
+define i32 @pthread_mutex_lock(%struct.pthread_mutex_t*) {
+       ret i32 0
+}
+
+define i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*) {
+       ret i32 0
+}
+
+define i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*) {
+       ret i32 0
+}
+
+define i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.__sched_param*) {
+        ret i32 0
+}
+
+define i32 @pthread_key_create(i32*, void (i8*)*) {
+       ret i32 0
+}
+
+define i32 @pthread_key_delete(i32) {
+        ret i32 0
+}
+
+define i32 @pthread_mutexattr_init(%struct.__sched_param*) {
+        ret i32 0
+}
+
+define i32 @pthread_mutexattr_settype(%struct.__sched_param*, i32) {
+        ret i32 0
+}
+
+define i32 @pthread_mutexattr_destroy(%struct.__sched_param*) {
+       ret i32 0
+}

Added: llvm/trunk/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt < %s -newgvn | llvm-dis
+; PR4256
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%struct.cset = type { i8*, i8, i8, i32, i8* }
+	%struct.lmat = type { %struct.re_guts*, i32, %llvm.dbg.anchor.type*, i8*, i8*, i8*, i8*, i8**, i32, i8*, i8*, i8*, i8*, i8* }
+	%struct.re_guts = type { i32*, %struct.cset*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, [1 x i8] }
+
+define i8* @lbackref(%struct.lmat* %m, i8* %start, i8* %stop, i32 %startst, i32 %stopst, i32 %lev, i32 %rec) nounwind {
+entry:
+	br label %bb63
+
+bb:		; preds = %bb63
+	switch i32 0, label %bb62 [
+		i32 268435456, label %bb2
+		i32 805306368, label %bb9
+		i32 -1610612736, label %bb51
+	]
+
+bb2:		; preds = %bb
+	br label %bb62
+
+bb9:		; preds = %bb
+	%0 = load i8, i8* %sp.1, align 1		; <i8> [#uses=0]
+	br label %bb62
+
+bb51:		; preds = %bb
+	%1 = load i8, i8* %sp.1, align 1		; <i8> [#uses=0]
+	ret i8* null
+
+bb62:		; preds = %bb9, %bb2, %bb
+	br label %bb63
+
+bb63:		; preds = %bb84, %bb69, %bb62, %entry
+	%sp.1 = phi i8* [ null, %bb62 ], [ %sp.1.lcssa, %bb84 ], [ %start, %entry ], [ %sp.1.lcssa, %bb69 ]		; <i8*> [#uses=3]
+	br i1 false, label %bb, label %bb65
+
+bb65:		; preds = %bb63
+	%sp.1.lcssa = phi i8* [ %sp.1, %bb63 ]		; <i8*> [#uses=4]
+	br i1 false, label %bb66, label %bb69
+
+bb66:		; preds = %bb65
+	ret i8* null
+
+bb69:		; preds = %bb65
+	switch i32 0, label %bb108.loopexit2.loopexit.loopexit [
+		i32 1342177280, label %bb63
+		i32 1476395008, label %bb84
+		i32 1879048192, label %bb104
+		i32 2013265920, label %bb93
+	]
+
+bb84:		; preds = %bb69
+	%2 = tail call i8* @lbackref(%struct.lmat* %m, i8* %sp.1.lcssa, i8* %stop, i32 0, i32 %stopst, i32 0, i32 0) nounwind		; <i8*> [#uses=0]
+	br label %bb63
+
+bb93:		; preds = %bb69
+	ret i8* null
+
+bb104:		; preds = %bb69
+	%sp.1.lcssa.lcssa33 = phi i8* [ %sp.1.lcssa, %bb69 ]		; <i8*> [#uses=0]
+	unreachable
+
+bb108.loopexit2.loopexit.loopexit:		; preds = %bb69
+	ret i8* null
+}

Added: llvm/trunk/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; Test to make sure malloc's bitcast does not block detection of a store 
+; to aliased memory; GVN should not optimize away the load in this program.
+; RUN: opt < %s -newgvn -S | FileCheck %s
+
+define i64 @test() {
+  %1 = tail call i8* @malloc(i64 mul (i64 4, i64 ptrtoint (i64* getelementptr (i64, i64* null, i64 1) to i64))) ; <i8*> [#uses=2]
+  store i8 42, i8* %1
+  %X = bitcast i8* %1 to i64*                     ; <i64*> [#uses=1]
+  %Y = load i64, i64* %X                               ; <i64> [#uses=1]
+  ret i64 %Y
+; CHECK: %Y = load i64, i64* %X
+; CHECK: ret i64 %Y
+}
+
+declare noalias i8* @malloc(i64)

Added: llvm/trunk/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+
+; CHECK-NOT: load
+; CHECK-NOT: phi
+
+define i8* @cat(i8* %s1, ...) nounwind {
+entry:
+  br i1 undef, label %bb, label %bb3
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  store i8* undef, i8** undef, align 4
+  br i1 undef, label %bb5, label %bb6
+
+bb5:                                              ; preds = %bb3
+  unreachable
+
+bb6:                                              ; preds = %bb3
+  br label %bb12
+
+bb8:                                              ; preds = %bb12
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb8
+  %0 = load i8*, i8** undef, align 4                   ; <i8*> [#uses=0]
+  %1 = load i8*, i8** undef, align 4                   ; <i8*> [#uses=0]
+  br label %bb11
+
+bb10:                                             ; preds = %bb8
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb9
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb6
+  br i1 undef, label %bb8, label %bb13
+
+bb13:                                             ; preds = %bb12
+  ret i8* undef
+}

Added: llvm/trunk/test/Transforms/NewGVN/2010-05-08-OneBit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2010-05-08-OneBit.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2010-05-08-OneBit.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2010-05-08-OneBit.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt < %s -newgvn
+; PR7052
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  %0 = getelementptr inbounds i8, i8* undef, i64 5    ; <i8*> [#uses=1]
+  %1 = bitcast i8* %0 to i32*                     ; <i32*> [#uses=1]
+  store i32 undef, i32* %1, align 1
+  br i1 undef, label %k121.i.i, label %l117.i.i
+
+l117.i.i:                                         ; preds = %entry
+  invoke fastcc void @foo()
+          to label %.noexc5 unwind label %landing_pad
+
+.noexc5:                                          ; preds = %l117.i.i
+  unreachable
+
+k121.i.i:                                         ; preds = %entry
+  br i1 undef, label %l129.i.i, label %k133.i.i
+
+l129.i.i:                                         ; preds = %k121.i.i
+  invoke fastcc void @foo()
+          to label %.noexc7 unwind label %landing_pad
+
+.noexc7:                                          ; preds = %l129.i.i
+  unreachable
+
+k133.i.i:                                         ; preds = %k121.i.i
+  %2 = getelementptr i8, i8* undef, i64 5             ; <i8*> [#uses=1]
+  %3 = bitcast i8* %2 to i1*                      ; <i1*> [#uses=1]
+  %4 = load i1, i1* %3                                ; <i1> [#uses=1]
+  br i1 %4, label %k151.i.i, label %l147.i.i
+
+l147.i.i:                                         ; preds = %k133.i.i
+  invoke fastcc void @foo()
+          to label %.noexc10 unwind label %landing_pad
+
+.noexc10:                                         ; preds = %l147.i.i
+  unreachable
+
+k151.i.i:                                         ; preds = %k133.i.i
+  ret i32 0
+
+landing_pad:                                      ; preds = %l147.i.i, %l129.i.i, %l117.i.i
+  %exn = landingpad {i8*, i32}
+            cleanup
+  switch i32 undef, label %fin [
+    i32 1, label %catch1
+    i32 2, label %catch
+  ]
+
+fin:                                              ; preds = %landing_pad
+  unreachable
+
+catch:                                            ; preds = %landing_pad
+  ret i32 1
+
+catch1:                                           ; preds = %landing_pad
+  ret i32 2
+}
+
+declare fastcc void @foo()
+
+declare i32 @__gxx_personality_v0(...)

Added: llvm/trunk/test/Transforms/NewGVN/2010-11-13-Simplify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2010-11-13-Simplify.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2010-11-13-Simplify.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2010-11-13-Simplify.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+
+declare i32 @foo(i32) readnone
+
+define i1 @bar() {
+; CHECK-LABEL: @bar(
+  %a = call i32 @foo (i32 0) readnone
+  %b = call i32 @foo (i32 0) readnone
+  %c = and i32 %a, %b
+  %x = call i32 @foo (i32 %a) readnone
+  %y = call i32 @foo (i32 %c) readnone
+  %z = icmp eq i32 %x, %y
+  ret i1 %z
+; CHECK: ret i1 true
+} 

Added: llvm/trunk/test/Transforms/NewGVN/2011-04-27-phioperands.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2011-04-27-phioperands.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2011-04-27-phioperands.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2011-04-27-phioperands.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,106 @@
+; RUN: opt -newgvn -disable-output < %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+
+ at nuls = external global [10 x i8]
+
+define fastcc void @p_ere() nounwind {
+entry:
+  br label %"<bb 5>"
+
+"<L18>.i":
+  br i1 undef, label %"<bb 3>.i30.i", label %doemit.exit51.i
+
+"<bb 3>.i30.i":
+  unreachable
+
+doemit.exit51.i:
+  br label %"<bb 53>.i"
+
+"<L19>.i":
+  br i1 undef, label %"<bb 3>.i55.i", label %doemit.exit76.i
+
+"<bb 3>.i55.i":
+  unreachable
+
+doemit.exit76.i:
+  br label %"<bb 53>.i"
+
+"<L98>.i":
+  store i8* getelementptr inbounds ([10 x i8], [10 x i8]* @nuls, i64 0, i64 0), i8** undef, align 8
+  br label %"<bb 53>.i"
+
+"<L99>.i":
+  br label %"<bb 53>.i"
+
+"<L24>.i":
+  br i1 undef, label %"<bb 53>.i", label %"<bb 35>.i"
+
+"<bb 35>.i":
+  br label %"<bb 53>.i"
+
+"<L28>.i":
+  br label %"<bb 53>.i"
+
+"<L29>.i":
+  br label %"<bb 53>.i"
+
+"<L39>.i":
+  br label %"<bb 53>.i"
+
+"<bb 53>.i":
+  %wascaret_2.i = phi i32 [ 0, %"<L39>.i" ], [ 0, %"<L29>.i" ], [ 0, %"<L28>.i" ], [ 0, %"<bb 35>.i" ], [ 0, %"<L99>.i" ], [ 0, %"<L98>.i" ], [ 0, %doemit.exit76.i ], [ 1, %doemit.exit51.i ], [ 0, %"<L24>.i" ]
+  %D.5496_84.i = load i8*, i8** undef, align 8
+  br i1 undef, label %"<bb 54>.i", label %"<bb 5>"
+
+"<bb 54>.i":
+  br i1 undef, label %"<bb 5>", label %"<bb 58>.i"
+
+"<bb 58>.i":
+  br i1 undef, label %"<bb 64>.i", label %"<bb 59>.i"
+
+"<bb 59>.i":
+  br label %"<bb 64>.i"
+
+"<bb 64>.i":
+  switch i32 undef, label %"<bb 5>" [
+    i32 42, label %"<L54>.i"
+    i32 43, label %"<L55>.i"
+    i32 63, label %"<L56>.i"
+    i32 123, label %"<bb 5>.i258.i"
+  ]
+
+"<L54>.i":
+  br i1 undef, label %"<bb 3>.i105.i", label %doemit.exit127.i
+
+"<bb 3>.i105.i":
+  unreachable
+
+doemit.exit127.i:
+  unreachable
+
+"<L55>.i":
+  br i1 undef, label %"<bb 3>.i157.i", label %"<bb 5>"
+
+"<bb 3>.i157.i":
+  unreachable
+
+"<L56>.i":
+  br label %"<bb 5>"
+
+"<bb 5>.i258.i":
+  unreachable
+
+"<bb 5>":
+  switch i32 undef, label %"<L39>.i" [
+    i32 36, label %"<L19>.i"
+    i32 94, label %"<L18>.i"
+    i32 124, label %"<L98>.i"
+    i32 42, label %"<L99>.i"
+    i32 43, label %"<L99>.i"
+    i32 46, label %"<L24>.i"
+    i32 63, label %"<L99>.i"
+    i32 91, label %"<L28>.i"
+    i32 92, label %"<L29>.i"
+  ]
+}

Added: llvm/trunk/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,91 @@
+; RUN: opt < %s -newgvn -S | FileCheck %s
+;
+
+%0 = type { i64, i1 }
+
+define i64 @test1(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %uadd = tail call %0 @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+  %uadd.0 = extractvalue %0 %uadd, 0
+  %add1 = add i64 %a, %b
+  %add2 =  add i64 %add1, %uadd.0
+  ret i64 %add2
+}
+
+; CHECK-LABEL: @test1(
+; CHECK-NOT: add1
+; CHECK: ret
+
+define i64 @test2(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %usub = tail call %0 @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+  %usub.0 = extractvalue %0 %usub, 0
+  %sub1 = sub i64 %a, %b
+  %add2 =  add i64 %sub1, %usub.0
+  ret i64 %add2
+}
+
+; CHECK-LABEL: @test2(
+; CHECK-NOT: sub1
+; CHECK: ret
+
+define i64 @test3(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %umul = tail call %0 @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
+  %umul.0 = extractvalue %0 %umul, 0
+  %mul1 = mul i64 %a, %b
+  %add2 =  add i64 %mul1, %umul.0
+  ret i64 %add2
+}
+
+; CHECK-LABEL: @test3(
+; CHECK-NOT: mul1
+; CHECK: ret
+
+define i64 @test4(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %sadd = tail call %0 @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
+  %sadd.0 = extractvalue %0 %sadd, 0
+  %add1 = add i64 %a, %b
+  %add2 =  add i64 %add1, %sadd.0
+  ret i64 %add2
+}
+
+; CHECK-LABEL: @test4(
+; CHECK-NOT: add1
+; CHECK: ret
+
+define i64 @test5(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %ssub = tail call %0 @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
+  %ssub.0 = extractvalue %0 %ssub, 0
+  %sub1 = sub i64 %a, %b
+  %add2 =  add i64 %sub1, %ssub.0
+  ret i64 %add2
+}
+
+; CHECK-LABEL: @test5(
+; CHECK-NOT: sub1
+; CHECK: ret
+
+define i64 @test6(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %smul = tail call %0 @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
+  %smul.0 = extractvalue %0 %smul, 0
+  %mul1 = mul i64 %a, %b
+  %add2 =  add i64 %mul1, %smul.0
+  ret i64 %add2
+}
+
+; CHECK-LABEL: @test6(
+; CHECK-NOT: mul1
+; CHECK: ret
+
+declare void @exit(i32) noreturn
+declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+declare %0 @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
+declare %0 @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
+declare %0 @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
+declare %0 @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
+declare %0 @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
+

Added: llvm/trunk/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,81 @@
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+%struct.__fundamental_type_info_pseudo = type { %struct.__type_info_pseudo }
+%struct.__type_info_pseudo = type { i8*, i8* }
+
+ at _ZTIi = external constant %struct.__fundamental_type_info_pseudo
+ at _ZTIb = external constant %struct.__fundamental_type_info_pseudo
+
+declare void @_Z4barv()
+
+declare void @_Z7cleanupv()
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readonly
+
+declare i8* @__cxa_begin_catch(i8*) nounwind
+
+declare void @__cxa_end_catch()
+
+declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*)
+
+define void @_Z3foov() uwtable personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0 {
+entry:
+  invoke void @_Z4barv()
+          to label %return unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 }
+          catch %struct.__fundamental_type_info_pseudo* @_ZTIi
+          catch %struct.__fundamental_type_info_pseudo* @_ZTIb
+          catch %struct.__fundamental_type_info_pseudo* @_ZTIi
+          catch %struct.__fundamental_type_info_pseudo* @_ZTIb
+  %exc_ptr2.i = extractvalue { i8*, i32 } %0, 0
+  %filter3.i = extractvalue { i8*, i32 } %0, 1
+  %typeid.i = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIi to i8*))
+; CHECK: call i32 @llvm.eh.typeid.for
+  %1 = icmp eq i32 %filter3.i, %typeid.i
+  br i1 %1, label %ppad, label %next
+
+next:                                             ; preds = %lpad
+  %typeid1.i = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIb to i8*))
+; CHECK: call i32 @llvm.eh.typeid.for
+  %2 = icmp eq i32 %filter3.i, %typeid1.i
+  br i1 %2, label %ppad2, label %next2
+
+ppad:                                             ; preds = %lpad
+  %3 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind
+  tail call void @__cxa_end_catch() nounwind
+  br label %return
+
+ppad2:                                            ; preds = %next
+  %D.2073_5.i = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind
+  tail call void @__cxa_end_catch() nounwind
+  br label %return
+
+next2:                                            ; preds = %next
+  call void @_Z7cleanupv()
+  %typeid = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIi to i8*))
+; CHECK-NOT: call i32 @llvm.eh.typeid.for
+  %4 = icmp eq i32 %filter3.i, %typeid
+  br i1 %4, label %ppad3, label %next3
+
+next3:                                            ; preds = %next2
+  %typeid1 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIb to i8*))
+  %5 = icmp eq i32 %filter3.i, %typeid1
+  br i1 %5, label %ppad4, label %unwind
+
+unwind:                                           ; preds = %next3
+  resume { i8*, i32 } %0
+
+ppad3:                                            ; preds = %next2
+  %6 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind
+  tail call void @__cxa_end_catch() nounwind
+  br label %return
+
+ppad4:                                            ; preds = %next3
+  %D.2080_5 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind
+  tail call void @__cxa_end_catch() nounwind
+  br label %return
+
+return:                                           ; preds = %ppad4, %ppad3, %ppad2, %ppad, %entry
+  ret void
+}

Added: llvm/trunk/test/Transforms/NewGVN/2012-05-22-PreCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2012-05-22-PreCrash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2012-05-22-PreCrash.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2012-05-22-PreCrash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,33 @@
+; RUN: opt < %s -newgvn
+; PR12858
+
+define void @fn5(i16 signext %p1, i8 signext %p2) nounwind uwtable {
+entry:
+  br i1 undef, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %conv = sext i16 %p1 to i32
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %conv1 = sext i16 %p1 to i32
+  br i1 undef, label %if.then3, label %if.else4
+
+if.then3:                                         ; preds = %if.end
+  br label %if.end12
+
+if.else4:                                         ; preds = %if.end
+  %conv7 = sext i8 %p2 to i32
+  %cmp8 = icmp eq i32 %conv1, %conv7
+  br i1 %cmp8, label %if.then10, label %if.end12
+
+if.then10:                                        ; preds = %if.else4
+  br label %if.end12
+
+if.end12:                                         ; preds = %if.then10, %if.else4, %if.then3
+  %conv13 = sext i8 %p2 to i32
+  ret void
+}

Added: llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,43 @@
+; XFAIL: *
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+
+declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
+declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
+
+; This test ensures that masked scatter and gather operations, which take vectors of pointers,
+; do not have pointer aliasing ignored when being processed.
+; No scatter/gather calls should end up eliminated
+; CHECK: llvm.masked.gather
+; CHECK: llvm.masked.gather
+; CHECK: llvm.masked.scatter
+; CHECK: llvm.masked.gather
+; CHECK: llvm.masked.scatter
+; CHECK: llvm.masked.gather
+define spir_kernel void @test(<2 x i32*> %in1, <2 x i32*> %in2, i32* %out) {
+entry:
+  ; Just some temporary storage
+  %tmp.0 = alloca i32
+  %tmp.1 = alloca i32
+  %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0
+  %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1
+  ; Read from in1 and in2
+  %in1.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %in2.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  ; Store in1 to the allocas
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
+  ; Read in1 from the allocas
+  ; This gather should alias the scatter we just saw
+  %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  ; Store in2 to the allocas
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
+  ; Read in2 from the allocas
+  ; This gather should alias the scatter we just saw, and not be eliminated
+  %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  ; Store in2 to out for good measure
+  %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0
+  %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1
+  store i32 %tmp.v.1.0, i32* %out
+  %out.1 = getelementptr i32, i32* %out, i32 1
+  store i32 %tmp.v.1.1, i32* %out.1
+  ret void
+}

Added: llvm/trunk/test/Transforms/NewGVN/MemdepMiscompile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/MemdepMiscompile.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/MemdepMiscompile.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/MemdepMiscompile.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; rdar://12801584
+; Value of %shouldExit can be changed by RunInMode.
+; Make sure we do not replace load %shouldExit in while.cond.backedge
+; with a phi node where the value from while.body is 0.
+define i32 @test() nounwind ssp {
+entry:
+; CHECK: test()
+; CHECK: while.body:
+; CHECK: call void @RunInMode
+; CHECK: br i1 %tobool, label %while.cond.backedge, label %if.then
+; CHECK: while.cond.backedge:
+; CHECK: load i32, i32* %shouldExit
+; CHECK: br i1 %cmp, label %while.body
+  %shouldExit = alloca i32, align 4
+  %tasksIdle = alloca i32, align 4
+  store i32 0, i32* %shouldExit, align 4
+  store i32 0, i32* %tasksIdle, align 4
+  call void @CTestInitialize(i32* %tasksIdle) nounwind
+  %0 = load i32, i32* %shouldExit, align 4
+  %cmp1 = icmp eq i32 %0, 0
+  br i1 %cmp1, label %while.body.lr.ph, label %while.end
+
+while.body.lr.ph:
+  br label %while.body
+
+while.body:
+  call void @RunInMode(i32 100) nounwind
+  %1 = load i32, i32* %tasksIdle, align 4
+  %tobool = icmp eq i32 %1, 0
+  br i1 %tobool, label %while.cond.backedge, label %if.then
+
+if.then:
+  store i32 0, i32* %tasksIdle, align 4
+  call void @TimerCreate(i32* %shouldExit) nounwind
+  br label %while.cond.backedge
+
+while.cond.backedge:
+  %2 = load i32, i32* %shouldExit, align 4
+  %cmp = icmp eq i32 %2, 0
+  br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
+
+while.cond.while.end_crit_edge:
+  br label %while.end
+
+while.end:
+  ret i32 0
+}
+declare void @CTestInitialize(i32*)
+declare void @RunInMode(i32)
+declare void @TimerCreate(i32*)

Added: llvm/trunk/test/Transforms/NewGVN/assume-equal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/assume-equal.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/assume-equal.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/assume-equal.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,274 @@
+; XFAIL: *
+; RUN: opt < %s -newgvn -S | FileCheck %s
+
+%struct.A = type { i32 (...)** }
+ at _ZTV1A = available_externally unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3barEv to i8*)], align 8
+ at _ZTI1A = external constant i8*
+
+; Checks if indirect calls can be replaced with direct
+; assuming that %vtable == @_ZTV1A (with alignment).
+; Checking const propagation across other BBs
+; CHECK-LABEL: define void @_Z1gb(
+
+define void @_Z1gb(i1 zeroext %p) {
+entry:
+  %call = tail call noalias i8* @_Znwm(i64 8) #4
+  %0 = bitcast i8* %call to %struct.A*
+  tail call void @_ZN1AC1Ev(%struct.A* %0) #1
+  %1 = bitcast i8* %call to i8***
+  %vtable = load i8**, i8*** %1, align 8
+  %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+  tail call void @llvm.assume(i1 %cmp.vtables)
+  br i1 %p, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+  %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+  
+  ; CHECK: call i32 @_ZN1A3fooEv(
+  %call2 = tail call i32 %2(%struct.A* %0) #1
+  
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1
+  %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)**
+  
+  ; CHECK: call i32 @_ZN1A3barEv(
+  %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8
+  
+  %call5 = tail call i32 %3(%struct.A* %0) #1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+; Check integration with invariant.group handling
+; CHECK-LABEL: define void @invariantGroupHandling(i1 zeroext %p) {
+define void @invariantGroupHandling(i1 zeroext %p) {
+entry:
+  %call = tail call noalias i8* @_Znwm(i64 8) #4
+  %0 = bitcast i8* %call to %struct.A*
+  tail call void @_ZN1AC1Ev(%struct.A* %0) #1
+  %1 = bitcast i8* %call to i8***
+  %vtable = load i8**, i8*** %1, align 8, !invariant.group !0
+  %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+  tail call void @llvm.assume(i1 %cmp.vtables)
+  br i1 %p, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+  %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+  
+; CHECK: call i32 @_ZN1A3fooEv(
+  %call2 = tail call i32 %2(%struct.A* %0) #1
+  %vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0
+  %vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)**
+  %call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8
+; CHECK: call i32 @_ZN1A3fooEv(
+  %callx = tail call i32 %call1(%struct.A* %0) #1
+  
+  %vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0
+  %vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)**
+  %call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8
+; CHECK: call i32 @_ZN1A3fooEv(
+  %cally = tail call i32 %call4(%struct.A* %0) #1
+  
+  %b = bitcast i8* %call to %struct.A**
+  %vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0
+  %vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)**
+  %vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8
+; CHECK: call i32 @_ZN1A3fooEv(
+  %unknown = tail call i32 %vfun(%struct.A* %0) #1
+  
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1
+  %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)**
+  
+  ; CHECK: call i32 @_ZN1A3barEv(
+  %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8
+  
+  %call5 = tail call i32 %3(%struct.A* %0) #1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+
+; Checking const propagation in the same BB
+; CHECK-LABEL: define i32 @main()
+
+define i32 @main() {
+entry:
+  %call = tail call noalias i8* @_Znwm(i64 8) 
+  %0 = bitcast i8* %call to %struct.A*
+  tail call void @_ZN1AC1Ev(%struct.A* %0) 
+  %1 = bitcast i8* %call to i8***
+  %vtable = load i8**, i8*** %1, align 8
+  %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+  tail call void @llvm.assume(i1 %cmp.vtables)
+  %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+  
+  ; CHECK: call i32 @_ZN1A3fooEv(
+  %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+  
+  %call2 = tail call i32 %2(%struct.A* %0)
+  ret i32 0
+}
+
+; This tests checks const propatation with fcmp instruction.
+; CHECK-LABEL: define float @_Z1gf(float %p)
+
+define float @_Z1gf(float %p) {
+entry:
+  %p.addr = alloca float, align 4
+  %f = alloca float, align 4
+  store float %p, float* %p.addr, align 4
+  
+  store float 3.000000e+00, float* %f, align 4
+  %0 = load float, float* %p.addr, align 4
+  %1 = load float, float* %f, align 4
+  %cmp = fcmp oeq float %1, %0 ; note const on lhs
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK: ret float 3.000000e+00
+  ret float %0
+}
+
+; CHECK-LABEL: define float @_Z1hf(float %p)
+
+define float @_Z1hf(float %p) {
+entry:
+  %p.addr = alloca float, align 4
+  store float %p, float* %p.addr, align 4
+  
+  %0 = load float, float* %p.addr, align 4
+  %cmp = fcmp nnan ueq float %0, 3.000000e+00
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK: ret float 3.000000e+00
+  ret float %0
+}
+
+; CHECK-LABEL: define float @_Z1if(float %p)
+define float @_Z1if(float %p) {
+entry:
+  %p.addr = alloca float, align 4
+  store float %p, float* %p.addr, align 4
+  
+  %0 = load float, float* %p.addr, align 4
+  %cmp = fcmp ueq float %0, 3.000000e+00 ; no nnan flag - can't propagate
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK-NOT: ret float 3.000000e+00
+  ret float %0
+}
+
+; This test checks if constant propagation works for multiple node edges
+; CHECK-LABEL: define i32 @_Z1ii(i32 %p)
+define i32 @_Z1ii(i32 %p) {
+entry:
+  %cmp = icmp eq i32 %p, 42
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK: br i1 true, label %bb2, label %bb2
+  br i1 %cmp, label %bb2, label %bb2
+bb2:
+  call void @llvm.assume(i1 true)
+  ; CHECK: br i1 true, label %bb2, label %bb2
+  br i1 %cmp, label %bb2, label %bb2
+  
+  ; CHECK: ret i32 42
+  ret i32 %p
+}
+
+; CHECK-LABEL: define i32 @_Z1ij(i32 %p)
+define i32 @_Z1ij(i32 %p) {
+entry:
+  %cmp = icmp eq i32 %p, 42
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK: br i1 true, label %bb2, label %bb2
+  br i1 %cmp, label %bb2, label %bb2
+bb2:
+   ; CHECK-NOT: %cmp2 = 
+  %cmp2 = icmp eq i32 %p, 42
+  ; CHECK-NOT: call void @llvm.assume(
+  call void @llvm.assume(i1 %cmp2)
+  
+  ; CHECK: br i1 true, label %bb2, label %bb2
+  br i1 %cmp, label %bb2, label %bb2
+  
+  ; CHECK: ret i32 42
+  ret i32 %p
+}
+
+; CHECK-LABEL: define i32 @_Z1ik(i32 %p)
+define i32 @_Z1ik(i32 %p) {
+entry:
+  %cmp = icmp eq i32 %p, 42
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK: br i1 true, label %bb2, label %bb3
+  br i1 %cmp, label %bb2, label %bb3
+bb2:
+  ; CHECK-NOT: %cmp3 = 
+  %cmp3 = icmp eq i32 %p, 43
+  ; CHECK: store i8 undef, i8* null
+  call void @llvm.assume(i1 %cmp3)
+  ret i32 15
+bb3:
+  ret i32 17
+}
+
+; This test checks if GVN can do the constant propagation correctly
+; when there are multiple uses of the same assume value in the 
+; basic block that has a loop back-edge pointing to itself.
+;
+; CHECK-LABEL: define i32 @_Z1il(i32 %val, i1 %k)
+define i32 @_Z1il(i32 %val, i1 %k) {
+  br label %next
+
+next:
+; CHECK: tail call void @llvm.assume(i1 %k)
+; CHECK-NEXT: %cmp = icmp eq i32 %val, 50
+  tail call void @llvm.assume(i1 %k)
+  tail call void @llvm.assume(i1 %k)
+  %cmp = icmp eq i32 %val, 50
+  br i1 %cmp, label %next, label %meh
+
+meh:
+  ret i32 0 
+}
+
+; This test checks if GVN can prevent the constant propagation correctly
+; in the successor blocks that are not dominated by the basic block
+; with the assume instruction.
+;
+; CHECK-LABEL: define i1 @_z1im(i32 %val, i1 %k, i1 %j)
+define i1 @_z1im(i32 %val, i1 %k, i1 %j) {
+  br i1 %j, label %next, label %meh
+
+next:
+; CHECK: tail call void @llvm.assume(i1 %k)
+; CHECK-NEXT: br label %meh
+  tail call void @llvm.assume(i1 %k)
+  tail call void @llvm.assume(i1 %k)
+  br label %meh
+
+meh:
+; CHECK: ret i1 %k
+  ret i1 %k
+}
+
+declare noalias i8* @_Znwm(i64)
+declare void @_ZN1AC1Ev(%struct.A*)
+declare void @llvm.assume(i1)
+declare i32 @_ZN1A3fooEv(%struct.A*)
+declare i32 @_ZN1A3barEv(%struct.A*)
+
+!0 = !{!"struct A"}

Added: llvm/trunk/test/Transforms/NewGVN/assumes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/assumes.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/assumes.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/assumes.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt < %s -newgvn -S | FileCheck %s
+
+; CHECK-LABEL: @test1
+; CHECK: ret i32 %arg
+define i32 @test1(i32 %arg) {
+  %cmp = icmp sge i32 %arg, 5
+  call void @llvm.assume(i1 %cmp)
+  ret i32 %arg
+}
+
+; CHECK-LABEL: @test2
+; CHECK: ret i32 %arg
+define i32 @test2(i32 %arg, i1 %b) {
+  br label %bb
+
+bb:
+  %a = phi i32 [ 1, %0 ], [ 2, %bb ]
+  %cmp = icmp eq i32 %arg, %a
+  call void @llvm.assume(i1 %cmp)
+  br i1 %b, label %bb, label %end
+
+end:
+  ret i32 %arg
+}
+
+declare void @llvm.assume(i1 %cond)

Added: llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,315 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+;; Function Attrs: nounwind ssp uwtable
+;; We should eliminate the sub, and one of the phi nodes
+define void @vnum_test1(i32* %data) #0 {
+; CHECK-LABEL: @vnum_test1(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[BB4:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    [[M_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB17:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP18:%.*]], [[BB17]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB6:%.*]], label [[BB19:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 [[TMP9]]
+; CHECK-NEXT:    store i32 2, i32* [[TMP10]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[DATA]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP15]] = add nsw i32 [[M_0]], [[TMP14]]
+; CHECK-NEXT:    br label [[BB17]]
+; CHECK:       bb17:
+; CHECK-NEXT:    [[TMP18]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb19:
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = getelementptr inbounds i32, i32* %data, i64 3
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = getelementptr inbounds i32, i32* %data, i64 4
+  %tmp3 = load i32, i32* %tmp2, align 4
+  br label %bb4
+
+bb4:                                              ; preds = %bb17, %bb
+  %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp15, %bb17 ]
+  %i.0 = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+  %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp16, %bb17 ]
+  %tmp5 = icmp slt i32 %i.0, %tmp1
+  br i1 %tmp5, label %bb6, label %bb19
+
+bb6:                                              ; preds = %bb4
+  %tmp7 = getelementptr inbounds i32, i32* %data, i64 2
+  %tmp8 = load i32, i32* %tmp7, align 4
+  %tmp9 = sext i32 %tmp8 to i64
+  %tmp10 = getelementptr inbounds i32, i32* %data, i64 %tmp9
+  store i32 2, i32* %tmp10, align 4
+  %tmp11 = sub nsw i32 %m.0, %n.0
+  %tmp12 = getelementptr inbounds i32, i32* %data, i64 0
+  store i32 %tmp11, i32* %tmp12, align 4
+  %tmp13 = getelementptr inbounds i32, i32* %data, i64 1
+  %tmp14 = load i32, i32* %tmp13, align 4
+  %tmp15 = add nsw i32 %m.0, %tmp14
+  %tmp16 = add nsw i32 %n.0, %tmp14
+  br label %bb17
+
+bb17:                                             ; preds = %bb6
+  %tmp18 = add nsw i32 %i.0, 1
+  br label %bb4
+
+bb19:                                             ; preds = %bb4
+  ret void
+}
+
+;; Function Attrs: nounwind ssp uwtable
+;; We should eliminate the sub, one of the phi nodes, prove the store of the sub
+;; and the load of data are equivalent, that the load always produces constant 0, and
+;; delete the load replacing it with constant 0.
+define i32 @vnum_test2(i32* %data) #0 {
+; CHECK-LABEL: @vnum_test2(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[BB4:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    [[M_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB19:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP20:%.*]], [[BB19]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB6:%.*]], label [[BB21:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 [[TMP9]]
+; CHECK-NEXT:    store i32 2, i32* [[TMP10]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[DATA]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP15]] = add nsw i32 [[M_0]], [[TMP14]]
+; CHECK-NEXT:    br label [[BB19]]
+; CHECK:       bb19:
+; CHECK-NEXT:    [[TMP20]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb21:
+; CHECK-NEXT:    ret i32 0
+;
+bb:
+  %tmp = getelementptr inbounds i32, i32* %data, i64 3
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = getelementptr inbounds i32, i32* %data, i64 4
+  %tmp3 = load i32, i32* %tmp2, align 4
+  br label %bb4
+
+bb4:                                              ; preds = %bb19, %bb
+  %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp15, %bb19 ]
+  %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp16, %bb19 ]
+  %i.0 = phi i32 [ 0, %bb ], [ %tmp20, %bb19 ]
+  %p.0 = phi i32 [ undef, %bb ], [ %tmp18, %bb19 ]
+  %tmp5 = icmp slt i32 %i.0, %tmp1
+  br i1 %tmp5, label %bb6, label %bb21
+
+bb6:                                              ; preds = %bb4
+  %tmp7 = getelementptr inbounds i32, i32* %data, i64 2
+  %tmp8 = load i32, i32* %tmp7, align 4
+  %tmp9 = sext i32 %tmp8 to i64
+  %tmp10 = getelementptr inbounds i32, i32* %data, i64 %tmp9
+  store i32 2, i32* %tmp10, align 4
+  %tmp11 = sub nsw i32 %m.0, %n.0
+  %tmp12 = getelementptr inbounds i32, i32* %data, i64 0
+  store i32 %tmp11, i32* %tmp12, align 4
+  %tmp13 = getelementptr inbounds i32, i32* %data, i64 1
+  %tmp14 = load i32, i32* %tmp13, align 4
+  %tmp15 = add nsw i32 %m.0, %tmp14
+  %tmp16 = add nsw i32 %n.0, %tmp14
+  %tmp17 = getelementptr inbounds i32, i32* %data, i64 0
+  %tmp18 = load i32, i32* %tmp17, align 4
+  br label %bb19
+
+bb19:                                             ; preds = %bb6
+  %tmp20 = add nsw i32 %i.0, 1
+  br label %bb4
+
+bb21:                                             ; preds = %bb4
+  ret i32 %p.0
+}
+
+
+; Function Attrs: nounwind ssp uwtable
+;; Same as test 2, with a conditional store of m-n, so it has to also discover
+;; that data ends up with the same value no matter what branch is taken.
+define i32 @vnum_test3(i32* %data) #0 {
+; CHECK-LABEL: @vnum_test3(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[BB4:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    [[N_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP19:%.*]], [[BB21:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP22:%.*]], [[BB21]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB6:%.*]], label [[BB23:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 5
+; CHECK-NEXT:    store i32 0, i32* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt i32 [[I_0]], 30
+; CHECK-NEXT:    br i1 [[TMP10]], label [[BB11:%.*]], label [[BB14:%.*]]
+; CHECK:       bb11:
+; CHECK-NEXT:    br label [[BB14]]
+; CHECK:       bb14:
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP19]] = add nsw i32 [[N_0]], [[TMP18]]
+; CHECK-NEXT:    br label [[BB21]]
+; CHECK:       bb21:
+; CHECK-NEXT:    [[TMP22]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb23:
+; CHECK-NEXT:    ret i32 0
+;
+bb:
+  %tmp = getelementptr inbounds i32, i32* %data, i64 3
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = getelementptr inbounds i32, i32* %data, i64 4
+  %tmp3 = load i32, i32* %tmp2, align 4
+  br label %bb4
+
+bb4:                                              ; preds = %bb21, %bb
+  %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp20, %bb21 ]
+  %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp19, %bb21 ]
+  %p.0 = phi i32 [ 0, %bb ], [ %tmp16, %bb21 ]
+  %i.0 = phi i32 [ 0, %bb ], [ %tmp22, %bb21 ]
+  %tmp5 = icmp slt i32 %i.0, %tmp1
+  br i1 %tmp5, label %bb6, label %bb23
+
+bb6:                                              ; preds = %bb4
+  %tmp7 = getelementptr inbounds i32, i32* %data, i64 2
+  %tmp8 = load i32, i32* %tmp7, align 4
+  %tmp9 = getelementptr inbounds i32, i32* %data, i64 5
+  store i32 0, i32* %tmp9, align 4
+  %tmp10 = icmp slt i32 %i.0, 30
+  br i1 %tmp10, label %bb11, label %bb14
+
+bb11:                                             ; preds = %bb6
+  %tmp12 = sub nsw i32 %m.0, %n.0
+  %tmp13 = getelementptr inbounds i32, i32* %data, i64 5
+  store i32 %tmp12, i32* %tmp13, align 4
+  br label %bb14
+
+bb14:                                             ; preds = %bb11, %bb6
+  %tmp15 = getelementptr inbounds i32, i32* %data, i64 5
+  %tmp16 = load i32, i32* %tmp15, align 4
+  %tmp17 = getelementptr inbounds i32, i32* %data, i64 1
+  %tmp18 = load i32, i32* %tmp17, align 4
+  %tmp19 = add nsw i32 %m.0, %tmp18
+  %tmp20 = add nsw i32 %n.0, %tmp18
+  br label %bb21
+
+bb21:                                             ; preds = %bb14
+  %tmp22 = add nsw i32 %i.0, 1
+  br label %bb4
+
+bb23:                                             ; preds = %bb4
+  ret i32 %p.0
+}
+
+;; This is an irreducible test case that will cause a memoryphi node loop
+;; in the two blocks.
+;; It's equivalent to something like
+;; *a = 0
+;; if (<....>) goto loopmiddle
+;; loopstart:
+;; loopmiddle:
+;; load *a
+;; *a = 0
+;; if (<....>) goto loopstart otherwise goto loopend
+;; loopend:
+;; load *a
+;; add the results of the loads
+;; return them
+;;
+;; Both loads should equal 0, but it requires being
+;; completely optimistic about MemoryPhis, otherwise
+;; we will not be able to see through the cycle.
+define i8 @irreducible_memoryphi(i8* noalias %arg, i8* noalias %arg2) {
+; CHECK-LABEL: @irreducible_memoryphi(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store i8 0, i8* [[ARG:%.*]]
+; CHECK-NEXT:    br i1 undef, label [[BB2:%.*]], label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB3:%.*]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i8 0
+;
+bb:
+  store i8 0, i8 *%arg
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb2, %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp2 = load i8, i8* %arg
+  store i8 0, i8 *%arg
+  br i1 undef, label %bb1, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %tmp = load i8, i8* %arg
+  %tmp3 = add i8 %tmp, %tmp2
+  ret i8 %tmp3
+}
+;; This is an irreducible test case that will cause a phi node loop
+;; in the two blocks
+;;
+;; It should return 0, but it requires being
+;; completely optimistic about phis, otherwise
+;; we will not be able to see through the cycle.
+define i32 @irreducible_phi(i32 %arg) {
+; CHECK-LABEL: @irreducible_phi(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br i1 undef, label [[BB2:%.*]], label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB3:%.*]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i32 0
+;
+bb:
+  %tmp = add i32 0, %arg
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb2, %bb
+  %phi1 = phi i32 [%tmp, %bb], [%phi2, %bb2]
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %phi2 = phi i32 [%tmp, %bb], [%phi1, %bb1]
+  br i1 undef, label %bb1, label %bb3
+
+bb3:                                              ; preds = %bb2
+  ; This should be zero
+  %tmp3 = sub i32 %tmp, %phi2
+  ret i32 %tmp3
+}
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0, !0, !0}
+
+!0 = !{!"Apple LLVM version 6.0 (clang-600.0.56) (based on LLVM 3.5svn)"}

Added: llvm/trunk/test/Transforms/NewGVN/basic-undef-test.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/basic-undef-test.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/basic-undef-test.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/basic-undef-test.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt -basicaa -newgvn -S < %s | FileCheck %s
+; ModuleID = 'test3.ll'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @main(i32 *%foo)  {
+entry:
+; CHECK: load i32, i32* %foo, align 4
+  %0 = load i32, i32* %foo, align 4
+  store i32 5, i32* undef, align 4
+; CHECK-NOT: load i32, i32* %foo, align 4
+  %1 = load i32, i32* %foo, align 4
+; CHECK: add i32 %0, %0
+  %2 = add i32 %0, %1
+  ret i32 %2
+}

Added: llvm/trunk/test/Transforms/NewGVN/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/basic.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -newgvn -S | FileCheck %s
+
+define i32 @main() {
+; CHECK-LABEL: @main(
+; CHECK-NEXT:  block1:
+; CHECK-NEXT:    br label [[BLOCK2:%.*]]
+; CHECK:       block2:
+; CHECK-NEXT:    ret i32 0
+;
+block1:
+  %z1 = bitcast i32 0 to i32
+  br label %block2
+block2:
+  %z2 = bitcast i32 0 to i32
+  ret i32 %z2
+}
+
+; Test that we simplify selects properly
+define i64 @simplifyselect(i64 %x, i64 %y, i1 %c1, i1 %c2, i1 %zzz) {
+; CHECK-LABEL: @simplifyselect(
+; CHECK-NEXT:    [[SHARED:%.*]] = add i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[C1:%.*]], i64 [[SHARED]], i64 [[X]]
+; CHECK-NEXT:    [[R2:%.*]] = select i1 [[C2:%.*]], i64 [[SHARED]], i64 [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = add i64 [[R1]], [[R2]]
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %shared = add i64 %x, %y
+  %r1 = select i1 %c1, i64 %shared, i64 %x
+  %r2 = select i1 %c2, i64 %shared, i64 %y
+  %tmp = select i1 %c2, i64 %x, i64 0
+  %r2_eq2 = select i1 %zzz, i64 %r2, i64 %r2
+  %r = add i64 %r1, %r2_eq2
+  ret i64 %r
+}

Added: llvm/trunk/test/Transforms/NewGVN/big-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/big-endian.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/big-endian.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/big-endian.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt -newgvn -S < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"                                                                                         
+target triple = "powerpc64-unknown-linux-gnu"                                                                                     
+
+;; Make sure we use correct bit shift based on storage size for
+;; loads reusing a load value.
+define i64 @test1({ i1, i8 }* %predA, { i1, i8 }* %predB) {
+; CHECK-LABEL: @test1
+; CHECK-NOT: [[V1:%.*]] = load i16, i16* %{{.*}}
+; CHECK-NOT: [[V2:%.*]] = lshr i16 [[V1]], 8
+; CHECK-NOT: trunc i16 [[V2]] to i1
+
+  %valueLoadA.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 0
+  %valueLoadA.fca.0.load = load i1, i1* %valueLoadA.fca.0.gep, align 8
+  %valueLoadB.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predB, i64 0, i32 0
+  %valueLoadB.fca.0.load = load i1, i1* %valueLoadB.fca.0.gep, align 8
+  %isTrue = and i1 %valueLoadA.fca.0.load, %valueLoadB.fca.0.load
+  %valueLoadA.fca.1.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 1
+  %valueLoadA.fca.1.load = load i8, i8* %valueLoadA.fca.1.gep, align 1
+  %isNotNullA = icmp ne i8 %valueLoadA.fca.1.load, 0
+  %valueLoadB.fca.1.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predB, i64 0, i32 1
+  %valueLoadB.fca.1.load = load i8, i8* %valueLoadB.fca.1.gep, align 1
+  %isNotNullB = icmp ne i8 %valueLoadB.fca.1.load, 0
+  %isNotNull = and i1 %isNotNullA, %isNotNullB
+  %isTrueAndNotNull = and i1 %isTrue, %isNotNull
+  %ret = zext i1 %isTrueAndNotNull to i64
+  ret i64 %ret
+}
+
+;; And likewise for loads reusing a store value.
+define i1 @test2(i8 %V, i8* %P) {
+; CHECK-LABEL: @test2
+; CHECK-NOT: lshr
+  store i8 %V, i8* %P
+  %P2 = bitcast i8* %P to i1*
+  %A = load i1, i1* %P2
+  ret i1 %A
+}
+

Added: llvm/trunk/test/Transforms/NewGVN/bitcast-of-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/bitcast-of-call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/bitcast-of-call.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/bitcast-of-call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -newgvn -S | FileCheck %s
+; PR2213
+
+define i32* @f(i8* %x) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = call i8* @m(i32 12)
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP]] to i32*
+; CHECK-NEXT:    ret i32* [[TMP1]]
+;
+entry:
+  %tmp = call i8* @m( i32 12 )            ; <i8*> [#uses=2]
+  %tmp1 = bitcast i8* %tmp to i32*                ; <i32*> [#uses=0]
+  %tmp3 = bitcast i32* %tmp1 to i8*
+  %tmp2 = bitcast i8* %tmp3 to i32*                ; <i32*> [#uses=0]
+  ret i32* %tmp2
+}
+
+declare i8* @m(i32)

Added: llvm/trunk/test/Transforms/NewGVN/br-identical.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/br-identical.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/br-identical.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/br-identical.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt -newgvn -S -o - %s | FileCheck %s
+; RUN: opt -passes=newgvn -S -o - %s | FileCheck %s
+
+; If a branch has two identical successors, we cannot declare either dead.
+
+define void @widget(i1 %p) {
+entry:
+  br label %bb2
+
+bb2:
+  %t1 = phi i64 [ 0, %entry ], [ %t5, %bb7 ]
+  %t2 = add i64 %t1, 1
+  %t3 = icmp ult i64 0, %t2
+  br i1 %t3, label %bb3, label %bb4
+
+bb3:
+  %t4 = call i64 @f()
+  br label %bb4
+
+bb4:
+  ; CHECK-NOT: phi {{.*}} undef
+  %foo = phi i64 [ %t4, %bb3 ], [ 0, %bb2 ]
+  br i1 %p, label %bb5, label %bb6
+
+bb5:
+  br i1 true, label %bb7, label %bb7
+
+bb6:
+  br i1 true, label %bb7, label %bb7
+
+bb7:
+  %t5 = add i64 %t1, 1
+  br i1 %p, label %bb2, label %bb8
+
+bb8:
+  ret void
+}
+
+declare i64 @f()

Added: llvm/trunk/test/Transforms/NewGVN/calloc-load-removal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/calloc-load-removal.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/calloc-load-removal.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/calloc-load-removal.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt -S -basicaa -newgvn < %s | FileCheck %s
+; RUN: opt -S -basicaa -newgvn -disable-simplify-libcalls < %s | FileCheck %s -check-prefix=CHECK_NO_LIBCALLS
+; Check that loads from calloc are recognized as being zero.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define i32 @test1() {
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  %2 = bitcast i8* %1 to i32*
+  ; This load is trivially constant zero
+  %3 = load i32, i32* %2, align 4
+  ret i32 %3
+
+; CHECK-LABEL: @test1(
+; CHECK-NOT: %3 = load i32, i32* %2, align 4
+; CHECK: ret i32 0
+
+; CHECK_NO_LIBCALLS-LABEL: @test1(
+; CHECK_NO_LIBCALLS: load
+; CHECK_NO_LIBCALLS: ret i32 %
+
+}
+
+declare noalias i8* @calloc(i64, i64)

Added: llvm/trunk/test/Transforms/NewGVN/calls-nonlocal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/calls-nonlocal.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/calls-nonlocal.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/calls-nonlocal.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,78 @@
+; XFAIL: *
+;; NewGVN zaps the strlens, but currently takes two iterations to evaluate the conditions, because
+;; we prune predicateinfo, and the icmps only become equivalent after the strlens are zapped
+; Two occurrences of strlen should be zapped.
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+
+define i32 @test(i32 %g, i8* %P) nounwind  {
+entry:
+	%tmp2 = call i32 @strlen( i8* %P ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp3 = icmp eq i32 %tmp2, 100		; <i1> [#uses=1]
+	%tmp34 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb6
+
+bb:		; preds = %entry
+	br label %bb27
+
+bb6:		; preds = %entry
+	%tmp8 = add i32 %g, 42		; <i32> [#uses=2]
+	%tmp10 = call i32 @strlen( i8* %P ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp11 = icmp eq i32 %tmp10, 100		; <i1> [#uses=1]
+	%tmp1112 = zext i1 %tmp11 to i8		; <i8> [#uses=1]
+	%toBool13 = icmp ne i8 %tmp1112, 0		; <i1> [#uses=1]
+	br i1 %toBool13, label %bb14, label %bb16
+
+bb14:		; preds = %bb6
+	br label %bb27
+
+bb16:		; preds = %bb6
+	%tmp18 = mul i32 %tmp8, 2		; <i32> [#uses=1]
+	%tmp20 = call i32 @strlen( i8* %P ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp21 = icmp eq i32 %tmp20, 100		; <i1> [#uses=1]
+	%tmp2122 = zext i1 %tmp21 to i8		; <i8> [#uses=1]
+	%toBool23 = icmp ne i8 %tmp2122, 0		; <i1> [#uses=1]
+	br i1 %toBool23, label %bb24, label %bb26
+
+bb24:		; preds = %bb16
+	br label %bb27
+
+bb26:		; preds = %bb16
+	br label %bb27
+
+bb27:		; preds = %bb26, %bb24, %bb14, %bb
+	%tmp.0 = phi i32 [ 11, %bb26 ], [ %tmp18, %bb24 ], [ %tmp8, %bb14 ], [ %g, %bb ]		; <i32> [#uses=1]
+	br label %return
+
+return:		; preds = %bb27
+	ret i32 %tmp.0
+}
+
+; CHECK: define i32 @test(i32 %g, i8* %P) #0 {
+; CHECK: entry:
+; CHECK:   %tmp2 = call i32 @strlen(i8* %P) #1
+; CHECK:   %tmp3 = icmp eq i32 %tmp2, 100
+; CHECK:   %tmp34 = zext i1 %tmp3 to i8
+; CHECK:   br i1 %tmp3, label %bb, label %bb6
+; CHECK: bb:
+; CHECK:   br label %bb27
+; CHECK: bb6:
+; CHECK:   %tmp8 = add i32 %g, 42
+; CHECK:   br i1 false, label %bb14, label %bb16
+; CHECK: bb14:
+; CHECK:   br label %bb27
+; CHECK: bb16:
+; CHECK:   %tmp18 = mul i32 %tmp8, 2
+; CHECK:   br i1 false, label %bb24, label %bb26
+; CHECK: bb24:
+; CHECK:   br label %bb27
+; CHECK: bb26:
+; CHECK:   br label %bb27
+; CHECK: bb27:
+; CHECK:   %tmp.0 = phi i32 [ 11, %bb26 ], [ undef, %bb24 ], [ undef, %bb14 ], [ %g, %bb ]
+; CHECK:   ret i32 %tmp.0
+; CHECK: }
+
+declare i32 @strlen(i8*) nounwind readonly 

Added: llvm/trunk/test/Transforms/NewGVN/calls-readonly.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/calls-readonly.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/calls-readonly.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/calls-readonly.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,45 @@
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+; Should delete the second call to strlen even though the intervening strchr call exists.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i8* @test(i8* %P, i8* %Q, i32 %x, i32 %y) nounwind readonly {
+entry:
+  %0 = tail call i32 @strlen(i8* %P)              ; <i32> [#uses=2]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %2 = sdiv i32 %x, %y                            ; <i32> [#uses=1]
+  br label %bb1
+
+bb1:                                              ; preds = %bb, %entry
+  %x_addr.0 = phi i32 [ %2, %bb ], [ %x, %entry ] ; <i32> [#uses=1]
+  %3 = tail call i8* @strchr(i8* %Q, i32 97)      ; <i8*> [#uses=1]
+  %4 = tail call i32 @strlen(i8* %P)              ; <i32> [#uses=1]
+  %5 = add i32 %x_addr.0, %0                      ; <i32> [#uses=1]
+  %.sum = sub i32 %5, %4                          ; <i32> [#uses=1]
+  %6 = getelementptr i8, i8* %3, i32 %.sum            ; <i8*> [#uses=1]
+  ret i8* %6
+}
+
+; CHECK: define i8* @test(i8* %P, i8* %Q, i32 %x, i32 %y) #0 {
+; CHECK: entry:
+; CHECK-NEXT:   %0 = tail call i32 @strlen(i8* %P)
+; CHECK-NEXT:   %1 = icmp eq i32 %0, 0
+; CHECK-NEXT:   br i1 %1, label %bb, label %bb1
+; CHECK: bb:
+; CHECK-NEXT:   %2 = sdiv i32 %x, %y
+; CHECK-NEXT:   br label %bb1
+; CHECK: bb1:
+; CHECK-NEXT:   %x_addr.0 = phi i32 [ %2, %bb ], [ %x, %entry ]
+; CHECK-NEXT:   %3 = tail call i8* @strchr(i8* %Q, i32 97)
+; CHECK-NEXT:   %4 = add i32 %x_addr.0, %0
+; CHECK-NEXT:   %5 = getelementptr i8, i8* %3, i32 %x_addr.0
+; CHECK-NEXT:   ret i8* %5
+; CHECK: }
+
+declare i32 @strlen(i8*) nounwind readonly
+
+declare i8* @strchr(i8*, i32) nounwind readonly

Added: llvm/trunk/test/Transforms/NewGVN/commute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/commute.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/commute.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/commute.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt -newgvn -S < %s | FileCheck %s
+
+declare void @use(i32, i32)
+
+define void @foo(i32 %x, i32 %y) {
+  ; CHECK-LABEL: @foo(
+  %add1 = add i32 %x, %y
+  %add2 = add i32 %y, %x
+  call void @use(i32 %add1, i32 %add2)
+  ; CHECK: @use(i32 %add1, i32 %add1)
+  ret void
+}
+
+declare void @vse(i1, i1)
+
+define void @bar(i32 %x, i32 %y) {
+  ; CHECK-LABEL: @bar(
+  %cmp1 = icmp ult i32 %x, %y
+  %cmp2 = icmp ugt i32 %y, %x
+  call void @vse(i1 %cmp1, i1 %cmp2)
+  ; CHECK: @vse(i1 %cmp1, i1 %cmp1)
+  ret void
+}

Added: llvm/trunk/test/Transforms/NewGVN/completeness.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/completeness.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/completeness.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/completeness.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,605 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -newgvn -enable-phi-of-ops=true -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @test1(i32, i8**) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
+; CHECK:         br label [[TMP6:%.*]]
+; CHECK:         br label [[TMP6]]
+; CHECK:         [[PHIOFOPS:%.*]] = phi i32 [ 105, [[TMP5]] ], [ 75, [[TMP4]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ]
+; CHECK-NEXT:    ret i32 [[PHIOFOPS]]
+;
+  %3 = icmp ne i32 %0, 0
+  br i1 %3, label %4, label %5
+
+; <label>:4:                                      ; preds = %2
+  br label %6
+
+; <label>:5:                                      ; preds = %2
+  br label %6
+
+; <label>:6:                                      ; preds = %5, %4
+  %.0 = phi i32 [ 5, %4 ], [ 7, %5 ]
+  %7 = mul nsw i32 %.0, 15
+  ret i32 %7
+}
+;; Dependent phi of ops
+define i32 @test1b(i32, i8**) {
+; CHECK-LABEL: @test1b(
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
+; CHECK:         br label [[TMP6:%.*]]
+; CHECK:         br label [[TMP6]]
+; CHECK:         [[PHIOFOPS1:%.*]] = phi i32 [ 105, [[TMP5]] ], [ 75, [[TMP4]] ]
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ 1575, [[TMP5]] ], [ 1125, [[TMP4]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ]
+; CHECK-NEXT:    ret i32 [[PHIOFOPS]]
+;
+  %3 = icmp ne i32 %0, 0
+  br i1 %3, label %4, label %5
+
+; <label>:4:                                      ; preds = %2
+  br label %6
+
+; <label>:5:                                      ; preds = %2
+  br label %6
+
+; <label>:6:                                      ; preds = %5, %4
+  %.0 = phi i32 [ 5, %4 ], [ 7, %5 ]
+  %7 = mul nsw i32 %.0, 15
+  %8 = mul nsw i32 %7, 15
+  ret i32 %8
+}
+
+define i32 @test2(i32) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
+; CHECK:         br label [[TMP5:%.*]]
+; CHECK:         br label [[TMP5]]
+; CHECK:         [[DOT01:%.*]] = phi i32 [ 3, [[TMP3]] ], [ 2, [[TMP4]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 2, [[TMP3]] ], [ 3, [[TMP4]] ]
+; CHECK-NEXT:    ret i32 5
+;
+  %2 = icmp ne i32 %0, 0
+  br i1 %2, label %3, label %4
+
+; <label>:3:                                      ; preds = %1
+  br label %5
+
+; <label>:4:                                      ; preds = %1
+  br label %5
+
+; <label>:5:                                      ; preds = %4, %3
+  %.01 = phi i32 [ 3, %3 ], [ 2, %4 ]
+  %.0 = phi i32 [ 2, %3 ], [ 3, %4 ]
+  %6 = add nsw i32 %.01, %.0
+  ret i32 %6
+}
+define i32 @test3(i1 %which) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 1000, [[ENTRY]] ], [ 10, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[PHIOFOPS]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = sub i32 123, %A
+  ret i32 %value
+}
+
+define <2 x i32> @test3vec(i1 %which) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi <2 x i32> [ <i32 -877, i32 -877>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 1000>, [[ENTRY]] ], [ <i32 10, i32 10>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[PHIOFOPS]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test3vec2(i1 %which) {
+; CHECK-LABEL: @test3vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi <2 x i32> [ <i32 -877, i32 -2167>, [[ENTRY:%.*]] ], [ <i32 113, i32 303>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 2500>, [[ENTRY]] ], [ <i32 10, i32 30>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[PHIOFOPS]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %value
+}
+
+;; This example is a bit contrived because we can't create fake memoryuses, so we use two loads in the if blocks
+define i32 @test4(i32, i8**, i32* noalias, i32* noalias) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    store i32 5, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    store i32 7, i32* [[TMP3:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
+; CHECK:         br label [[TMP8:%.*]]
+; CHECK:         br label [[TMP8]]
+; CHECK:         [[DOT01:%.*]] = phi i32 [ 5, [[TMP6]] ], [ 7, [[TMP7]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32* [ [[TMP2]], [[TMP6]] ], [ [[TMP3]], [[TMP7]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = mul nsw i32 [[TMP9]], 15
+; CHECK-NEXT:    [[TMP11:%.*]] = mul nsw i32 [[TMP10]], [[DOT01]]
+; CHECK-NEXT:    ret i32 [[TMP11]]
+;
+  store i32 5, i32* %2, align 4
+  store i32 7, i32* %3, align 4
+  %5 = icmp ne i32 %0, 0
+  br i1 %5, label %6, label %8
+
+; <label>:6:                                      ; preds = %4
+  %7 = load i32, i32* %2, align 4
+  br label %10
+
+; <label>:8:                                      ; preds = %4
+  %9 = load i32, i32* %3, align 4
+  br label %10
+
+; <label>:10:                                     ; preds = %8, %6
+  %.01 = phi i32 [ %7, %6 ], [ %9, %8 ]
+  %.0 = phi i32* [ %2, %6 ], [ %3, %8 ]
+  %11 = load i32, i32* %.0, align 4
+  %12 = mul nsw i32 %11, 15
+  %13 = mul nsw i32 %12, %.01
+  ret i32 %13
+}
+
+ at global = common global [100 x i64] zeroinitializer, align 16
+ at global.1 = common global [100 x i64] zeroinitializer, align 16
+define i64 @test5(i64 %arg) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i64, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[ARG:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BB28:%.*]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB7:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    br label [[BB5:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP9:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB27:%.*]], label [[BB7]]
+; CHECK:       bb7:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i64 [ [[ARG]], [[BB2]] ], [ [[TMP9]], [[BB5]] ]
+; CHECK-NEXT:    [[TMP9]] = add nsw i64 [[TMP8]], -1
+; CHECK-NEXT:    [[TMP10:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16
+; CHECK-NEXT:    [[TMP11:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16
+; CHECK-NEXT:    [[TMP12:%.*]] = mul nsw i64 [[TMP11]], [[TMP10]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[TMP12]], 0
+; CHECK-NEXT:    br i1 [[TMP13]], label [[BB5]], label [[BB14:%.*]]
+; CHECK:       bb14:
+; CHECK-NEXT:    br label [[BB15:%.*]]
+; CHECK:       bb15:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i64 [ [[TMP12]], [[BB14]] ], [ [[TMP25:%.*]], [[BB15]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi i64 [ [[TMP24:%.*]], [[BB15]] ], [ [[TMP11]], [[BB14]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i64 [ [[TMP22:%.*]], [[BB15]] ], [ [[TMP10]], [[BB14]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = phi i64 [ [[TMP20:%.*]], [[BB15]] ], [ 0, [[BB14]] ]
+; CHECK-NEXT:    store i64 [[PHIOFOPS]], i64* [[TMP]], align 8
+; CHECK-NEXT:    [[TMP20]] = add nuw nsw i64 [[TMP18]], 1
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 [[TMP20]]
+; CHECK-NEXT:    [[TMP22]] = load i64, i64* [[TMP21]], align 8
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 [[TMP20]]
+; CHECK-NEXT:    [[TMP24]] = load i64, i64* [[TMP23]], align 8
+; CHECK-NEXT:    [[TMP25]] = mul nsw i64 [[TMP24]], [[TMP22]]
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[TMP20]], [[TMP25]]
+; CHECK-NEXT:    br i1 [[TMP26]], label [[BB4:%.*]], label [[BB15]]
+; CHECK:       bb27:
+; CHECK-NEXT:    br label [[BB28]]
+; CHECK:       bb28:
+; CHECK-NEXT:    ret i64 0
+;
+bb:
+  %tmp = alloca i64, align 8
+  %tmp1 = icmp eq i64 %arg, 0
+  br i1 %tmp1, label %bb28, label %bb2
+
+bb2:                                              ; preds = %bb
+  %tmp3 = bitcast i64* %tmp to i8*
+  br label %bb7
+
+bb4:                                              ; preds = %bb15
+  br label %bb5
+
+bb5:                                              ; preds = %bb7, %bb4
+  %tmp6 = icmp eq i64 %tmp9, 0
+  br i1 %tmp6, label %bb27, label %bb7
+
+bb7:                                              ; preds = %bb5, %bb2
+  %tmp8 = phi i64 [ %arg, %bb2 ], [ %tmp9, %bb5 ]
+  %tmp9 = add nsw i64 %tmp8, -1
+  %tmp10 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16
+  %tmp11 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16
+  %tmp12 = mul nsw i64 %tmp11, %tmp10
+  %tmp13 = icmp eq i64 %tmp12, 0
+  br i1 %tmp13, label %bb5, label %bb14
+
+bb14:                                             ; preds = %bb7
+  br label %bb15
+
+bb15:                                             ; preds = %bb15, %bb14
+  %tmp16 = phi i64 [ %tmp24, %bb15 ], [ %tmp11, %bb14 ]
+  %tmp17 = phi i64 [ %tmp22, %bb15 ], [ %tmp10, %bb14 ]
+  %tmp18 = phi i64 [ %tmp20, %bb15 ], [ 0, %bb14 ]
+;; This multiply is an op of phis which is really equivalent to phi(tmp25, tmp12)
+  %tmp19 = mul nsw i64 %tmp16, %tmp17
+  store i64 %tmp19, i64* %tmp, align 8
+  %tmp20 = add nuw nsw i64 %tmp18, 1
+  %tmp21 = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 %tmp20
+  %tmp22 = load i64, i64* %tmp21, align 8
+  %tmp23 = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 %tmp20
+  %tmp24 = load i64, i64* %tmp23, align 8
+  %tmp25 = mul nsw i64 %tmp24, %tmp22
+  %tmp26 = icmp eq i64 %tmp20, %tmp25
+  br i1 %tmp26, label %bb4, label %bb15
+
+bb27:                                             ; preds = %bb5
+  br label %bb28
+
+bb28:                                             ; preds = %bb27, %bb
+  ret i64 0
+}
+
+;; These icmps are all equivalent to phis of constants
+define i8 @test6(i8* %addr) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry-block:
+; CHECK-NEXT:    br label %main-loop
+; CHECK:       main-loop:
+; CHECK-NEXT:    [[PHIOFOPS1:%.*]] = phi i1 [ true, %entry-block ], [ false, [[CORE:%.*]] ]
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ false, %entry-block ], [ true, [[CORE]] ]
+; CHECK-NEXT:    [[PHI:%.*]] = phi i8 [ 0, %entry-block ], [ 1, [[CORE]] ]
+; CHECK-NEXT:    store volatile i8 0, i8* [[ADDR:%.*]]
+; CHECK-NEXT:    br i1 [[PHIOFOPS1]], label %busy-wait-phi-0, label [[EXIT:%.*]]
+; CHECK:       busy-wait-phi-0:
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i8, i8* [[ADDR]]
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0
+; CHECK-NEXT:    br i1 [[ICMP]], label %busy-wait-phi-0, label [[CORE]]
+; CHECK:       core:
+; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[TRAP:%.*]], label %main-loop
+; CHECK:       trap:
+; CHECK-NEXT:    ret i8 1
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 0
+;
+entry-block:
+  br label %main-loop
+
+main-loop:
+  %phi = phi i8 [ 0, %entry-block ], [ 1, %core ]
+  %switch_0 = icmp eq i8 %phi, 0
+  store volatile i8 0, i8* %addr
+  br i1 %switch_0, label %busy-wait-phi-0, label %exit
+
+busy-wait-phi-0:
+  %load = load volatile i8, i8* %addr
+  %icmp = icmp eq i8 %load, 0
+  br i1 %icmp, label %busy-wait-phi-0, label %core
+
+core:
+  %switch_1 = icmp eq i8 %phi, 1
+  br i1 %switch_1, label %trap, label %main-loop
+
+trap:
+  ret i8 1
+
+exit:
+  ret i8 0
+}
+
+; Test that we don't infinite loop simplifying
+; an undefined value that can go both ways.
+define void @test7() {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp = phi i32 [ undef, %bb ], [ %tmp3, %bb1 ]
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = select i1 %tmp2, i32 1, i32 %tmp
+  br label %bb1
+}
+
+
+
+; Test that we get a consistent answer about what the
+; value of this undefined select is.
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  %tmp = select i1 undef, i8 0, i8 1
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp2 = phi i8 [ %tmp4, %bb1 ], [ %tmp, %bb ]
+  %tmp3 = icmp eq i8 %tmp2, 0
+  %tmp4 = select i1 %tmp3, i8 1, i8 %tmp2
+  br label %bb1
+}
+
+
+;; Make sure we handle the case where we later come up with an expression that we need
+;; for a phi of ops.
+define void @test9() {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB6:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ -13, [[BB2]] ], [ [[TMP11:%.*]], [[BB6]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i32 [ 1, [[BB2]] ], [ [[TMP8:%.*]], [[BB6]] ]
+; CHECK-NEXT:    [[TMP8]] = add nuw nsw i32 [[TMP7]], 1
+; CHECK-NEXT:    [[TMP11]] = add i32 -14, [[TMP8]]
+; CHECK-NEXT:    br label [[BB6]]
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  br i1 undef, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %tmp = select i1 true, i32 -14, i32 -10
+  %tmp3 = add i32 %tmp, 0
+  %tmp4 = select i1 true, i32 -14, i32 -10
+  %tmp5 = add i32 %tmp4, 0
+  br label %bb6
+
+bb6:                                              ; preds = %bb6, %bb2
+  %tmp7 = phi i32 [ 1, %bb2 ], [ %tmp13, %bb6 ]
+  %tmp8 = add nuw nsw i32 %tmp7, 1
+  %tmp9 = add i32 %tmp3, %tmp7
+  %tmp10 = select i1 false, i32 undef, i32 %tmp9
+  %tmp11 = add i32 %tmp5, %tmp8
+  %tmp12 = select i1 undef, i32 undef, i32 %tmp11
+  %tmp13 = add nuw nsw i32 %tmp7, 1
+  br label %bb6
+}
+
+;; Ensure that we revisit predicateinfo operands at the right points in time.
+define void @test10() {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:  b:
+; CHECK-NEXT:    br label [[G:%.*]]
+; CHECK:       g:
+; CHECK-NEXT:    [[N:%.*]] = phi i32* [ [[H:%.*]], [[I:%.*]] ], [ null, [[B:%.*]] ]
+; CHECK-NEXT:    [[H]] = getelementptr i32, i32* [[N]], i64 1
+; CHECK-NEXT:    [[J:%.*]] = icmp eq i32* [[H]], inttoptr (i64 32 to i32*)
+; CHECK-NEXT:    br i1 [[J]], label [[C:%.*]], label [[I]]
+; CHECK:       i:
+; CHECK-NEXT:    br i1 undef, label [[K:%.*]], label [[G]]
+; CHECK:       k:
+; CHECK-NEXT:    br i1 false, label [[C]], label [[O:%.*]]
+; CHECK:       o:
+; CHECK-NEXT:    br label [[C]]
+; CHECK:       c:
+; CHECK-NEXT:    ret void
+;
+b:
+  %m = getelementptr i32, i32* null, i64 8
+  br label %g
+
+g:                                                ; preds = %i, %b
+  %n = phi i32* [ %h, %i ], [ null, %b ]
+  %h = getelementptr i32, i32* %n, i64 1
+  %j = icmp eq i32* %h, %m
+  br i1 %j, label %c, label %i
+
+i:                                                ; preds = %g
+  br i1 undef, label %k, label %g
+
+k:                                                ; preds = %i
+  %l = icmp eq i32* %n, %m
+  br i1 %l, label %c, label %o
+
+o:                                                ; preds = %k
+  br label %c
+
+c:                                                ; preds = %o, %k, %g
+  %0 = phi i32* [ undef, %o ], [ %m, %k ], [ %m, %g ]
+  ret void
+}
+
+;; Ensure we handle VariableExpression properly.
+define void @test11() {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[TMP:%.*]] = phi i1 [ false, [[BB1]] ], [ true, [[BB:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32* @wombat()
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32* [[TMP3]], null
+; CHECK-NEXT:    [[TMP5:%.*]] = and i1 [[TMP]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB6:%.*]], label [[BB7:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    unreachable
+; CHECK:       bb7:
+; CHECK-NEXT:    ret void
+;
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp = phi i1 [ false, %bb1 ], [ true, %bb ]
+  %tmp3 = call i32* @wombat()
+  %tmp4 = icmp ne i32* %tmp3, null
+  %tmp5 = and i1 %tmp, %tmp4
+  br i1 %tmp5, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb2
+  unreachable
+
+bb7:                                              ; preds = %bb2
+  ret void
+}
+
+declare i32* @wombat()
+
+;; Ensure that when reachability affects a phi of ops, we recompute
+;; it.  Here, the phi node is marked for recomputation when bb7->bb3
+;; becomes live, but the value does not change. if we do not directly
+;; recompute the phi of ops instruction (tmp5), the value number will
+;; change in the verifier, as it goes from a constant value to a
+;; phi of [true, false]
+
+define void @test12() {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* null
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BB2:%.*]], label [[BB8:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB2]] ], [ false, [[BB7:%.*]] ]
+; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[BB6:%.*]], label [[BB7]]
+; CHECK:       bb6:
+; CHECK-NEXT:    br label [[BB7]]
+; CHECK:       bb7:
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb8:
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = load i32, i32* null
+  %tmp1 = icmp sgt i32 %tmp, 0
+  br i1 %tmp1, label %bb2, label %bb8
+
+bb2:                                              ; preds = %bb
+  br label %bb3
+
+bb3:                                              ; preds = %bb7, %bb2
+  %tmp4 = phi i32 [ %tmp, %bb2 ], [ undef, %bb7 ]
+  %tmp5 = icmp sgt i32 %tmp4, 0
+  br i1 %tmp5, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb3
+  br label %bb7
+
+bb7:                                              ; preds = %bb6, %bb3
+  br label %bb3
+
+bb8:                                              ; preds = %bb
+  ret void
+}
+
+;; Make sure we reprocess phi of ops involving loads when loads change class.
+;; This is PR 34473
+define void @test13() {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP:%.*]] = load i8, i8* null
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i8 [ [[TMP]], [[BB1]] ], [ [[TMP10:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i8* [ null, [[BB1]] ], [ [[TMP6:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i32 [ undef, [[BB1]] ], [ [[TMP9:%.*]], [[BB3]] ]
+; CHECK-NEXT:    [[TMP6]] = getelementptr i8, i8* [[TMP4]], i64 1
+; CHECK-NEXT:    [[TMP8:%.*]] = sext i8 [[PHIOFOPS]] to i32
+; CHECK-NEXT:    [[TMP9]] = mul i32 [[TMP5]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10]] = load i8, i8* [[TMP6]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
+; CHECK-NEXT:    br i1 [[TMP11]], label [[BB12:%.*]], label [[BB3]]
+; CHECK:       bb12:
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP9]], 0
+; CHECK-NEXT:    br i1 [[TMP14]], label [[BB1]], label [[BB15:%.*]]
+; CHECK:       bb15:
+; CHECK-NEXT:    call void (...) @bar()
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb15, %bb12, %bb
+  %tmp = load i8, i8* null
+  %tmp2 = icmp eq i8 %tmp, 8
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb1
+  %tmp4 = phi i8* [ null, %bb1 ], [ %tmp6, %bb3 ]
+  %tmp5 = phi i32 [ undef, %bb1 ], [ %tmp9, %bb3 ]
+  %tmp6 = getelementptr i8, i8* %tmp4, i64 1
+  %tmp7 = load i8, i8* %tmp4
+  %tmp8 = sext i8 %tmp7 to i32
+  %tmp9 = mul i32 %tmp5, %tmp8
+  %tmp10 = load i8, i8* %tmp6
+  %tmp11 = icmp eq i8 %tmp10, 0
+  br i1 %tmp11, label %bb12, label %bb3
+
+bb12:                                             ; preds = %bb3
+  %tmp13 = phi i32 [ %tmp9, %bb3 ]
+  %tmp14 = icmp eq i32 %tmp13, 0
+  br i1 %tmp14, label %bb1, label %bb15
+
+bb15:                                             ; preds = %bb12
+  call void (...) @bar()
+  br label %bb1
+}
+
+declare void @bar(...)
+

Added: llvm/trunk/test/Transforms/NewGVN/cond_br.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/cond_br.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/cond_br.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/cond_br.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,55 @@
+; RUN: opt -basicaa -newgvn -S < %s | FileCheck %s
+ at y = external global i32
+ at z = external global i32
+
+; Function Attrs: nounwind ssp uwtable
+define void @foo(i32 %x) {
+; CHECK: @foo(i32 %x)
+; CHECK: %.pre = load i32, i32* @y
+; CHECK: call void @bar(i32 %.pre)
+
+  %t = sub i32 %x, %x
+  %.pre = load i32, i32* @y, align 4
+  %cmp = icmp sgt i32 %t, 2
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+entry.if.end_crit_edge:                           ; preds = %entry
+  br label %if.end
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %x, 3
+  store i32 %add, i32* @y, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then
+  %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add, %if.then ]
+  tail call void @bar(i32 %1)
+  ret void
+}
+
+define void @foo2(i32 %x) {
+; CHECK: @foo2(i32 %x)
+; CHECK: %.pre = load i32, i32* @y
+; CHECK: tail call void @bar(i32 %.pre)
+entry:
+  %t = sub i32 %x, %x
+  %.pre = load i32, i32* @y, align 4
+  %cmp = icmp sgt i32 %t, 2
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %add = add nsw i32 %x, 3
+  store i32 %add, i32* @y, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 1, i32* @z, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %0 = phi i32 [ %.pre, %if.else ], [ %add, %if.then ]
+  tail call void @bar(i32 %0)
+  ret void
+}
+
+declare void @bar(i32)

Added: llvm/trunk/test/Transforms/NewGVN/cond_br2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/cond_br2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/cond_br2.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/cond_br2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,141 @@
+; XFAIL: *
+; RUN: opt -basicaa -newgvn -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl", [1 x %"union.llvm::SmallVectorBase::U"] }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" }
+%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" }
+%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase" }
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8*, %"union.llvm::SmallVectorBase::U" }
+%"union.llvm::SmallVectorBase::U" = type { x86_fp80 }
+
+; Function Attrs: ssp uwtable
+define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK: @_Z4testv()
+; CHECK: invoke.cont:
+; CHECK: br i1 true, label %new.notnull.i11, label %if.end.i14
+; CHECK: Retry.i10:
+
+entry:
+  %sv = alloca %"class.llvm::SmallVector", align 16
+  %0 = bitcast %"class.llvm::SmallVector"* %sv to i8*
+  call void @llvm.lifetime.start.p0i8(i64 64, i8* %0) #1
+  %BeginX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0
+  %FirstEl.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 3
+  %1 = bitcast %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i to i8*
+  store i8* %1, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %EndX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1
+  store i8* %1, i8** %EndX.i.i.i.i.i.i, align 8, !tbaa !4
+  %CapacityX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2
+  %add.ptr.i.i.i.i2.i.i = getelementptr inbounds %"union.llvm::SmallVectorBase::U", %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i, i64 2
+  %add.ptr.i.i.i.i.i.i = bitcast %"union.llvm::SmallVectorBase::U"* %add.ptr.i.i.i.i2.i.i to i8*
+  store i8* %add.ptr.i.i.i.i.i.i, i8** %CapacityX.i.i.i.i.i.i, align 16, !tbaa !4
+  %EndX.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1
+  %2 = load i8*, i8** %EndX.i, align 8, !tbaa !4
+  %CapacityX.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2
+  %cmp.i = icmp ult i8* %2, %add.ptr.i.i.i.i.i.i
+  br i1 %cmp.i, label %Retry.i, label %if.end.i
+
+Retry.i:                                          ; preds = %.noexc, %entry
+  %3 = phi i8* [ %2, %entry ], [ %.pre.i, %.noexc ]
+  %new.isnull.i = icmp eq i8* %3, null
+  br i1 %new.isnull.i, label %invoke.cont, label %new.notnull.i
+
+new.notnull.i:                                    ; preds = %Retry.i
+  %4 = bitcast i8* %3 to i32*
+  store i32 1, i32* %4, align 4, !tbaa !5
+  br label %invoke.cont
+
+if.end.i:                                         ; preds = %entry
+  %5 = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0
+  invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %5, i64 0, i64 4)
+          to label %.noexc unwind label %lpad
+
+.noexc:                                           ; preds = %if.end.i
+  %.pre.i = load i8*, i8** %EndX.i, align 8, !tbaa !4
+  br label %Retry.i
+
+invoke.cont:                                      ; preds = %new.notnull.i, %Retry.i
+  %add.ptr.i = getelementptr inbounds i8, i8* %3, i64 4
+  store i8* %add.ptr.i, i8** %EndX.i, align 8, !tbaa !4
+  %6 = load i8*, i8** %CapacityX.i, align 16, !tbaa !4
+  %cmp.i8 = icmp ult i8* %add.ptr.i, %6
+  br i1 %cmp.i8, label %new.notnull.i11, label %if.end.i14
+
+Retry.i10:                                        ; preds = %if.end.i14
+  %.pre.i13 = load i8*, i8** %EndX.i, align 8, !tbaa !4
+  %new.isnull.i9 = icmp eq i8* %.pre.i13, null
+  br i1 %new.isnull.i9, label %invoke.cont2, label %new.notnull.i11
+
+new.notnull.i11:                                  ; preds = %invoke.cont, %Retry.i10
+  %7 = phi i8* [ %.pre.i13, %Retry.i10 ], [ %add.ptr.i, %invoke.cont ]
+  %8 = bitcast i8* %7 to i32*
+  store i32 2, i32* %8, align 4, !tbaa !5
+  br label %invoke.cont2
+
+if.end.i14:                                       ; preds = %invoke.cont
+  %9 = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0
+  invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %9, i64 0, i64 4)
+          to label %Retry.i10 unwind label %lpad
+
+invoke.cont2:                                     ; preds = %new.notnull.i11, %Retry.i10
+  %10 = phi i8* [ null, %Retry.i10 ], [ %7, %new.notnull.i11 ]
+  %add.ptr.i12 = getelementptr inbounds i8, i8* %10, i64 4
+  store i8* %add.ptr.i12, i8** %EndX.i, align 8, !tbaa !4
+  invoke void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"* %sv)
+          to label %invoke.cont3 unwind label %lpad
+
+invoke.cont3:                                     ; preds = %invoke.cont2
+  %11 = load i8*, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %cmp.i.i.i.i19 = icmp eq i8* %11, %1
+  br i1 %cmp.i.i.i.i19, label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21, label %if.then.i.i.i20
+
+if.then.i.i.i20:                                  ; preds = %invoke.cont3
+  call void @free(i8* %11) #1
+  br label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21
+
+_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21:          ; preds = %invoke.cont3, %if.then.i.i.i20
+  call void @llvm.lifetime.end.p0i8(i64 64, i8* %0) #1
+  ret void
+
+lpad:                                             ; preds = %if.end.i14, %if.end.i, %invoke.cont2
+  %12 = landingpad { i8*, i32 }
+          cleanup
+  %13 = load i8*, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %cmp.i.i.i.i = icmp eq i8* %13, %1
+  br i1 %cmp.i.i.i.i, label %eh.resume, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %lpad
+  call void @free(i8* %13) #1
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %if.then.i.i.i, %lpad
+  resume { i8*, i32 } %12
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+declare void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"*, i64, i64) #2
+
+; Function Attrs: nounwind
+declare void @free(i8* nocapture) #3
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = !{!"any pointer", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"int", !1}
+!4 = !{!0, !0, i64 0}
+!5 = !{!3, !3, i64 0}

Added: llvm/trunk/test/Transforms/NewGVN/condprop-xfail.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/condprop-xfail.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/condprop-xfail.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/condprop-xfail.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,123 @@
+; XFAIL: *
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+
+ at a = external global i32		; <i32*> [#uses=7]
+
+;; NewGVN takes two passes to get this, because we prune predicateinfo
+; CHECK-LABEL: @test1(
+define i32 @test1() nounwind {
+entry:
+	%0 = load i32, i32* @a, align 4
+	%1 = icmp eq i32 %0, 4
+	br i1 %1, label %bb, label %bb1
+
+bb:		; preds = %entry
+	br label %bb8
+
+bb1:		; preds = %entry
+	%2 = load i32, i32* @a, align 4
+	%3 = icmp eq i32 %2, 5
+	br i1 %3, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	br label %bb8
+
+bb3:		; preds = %bb1
+	%4 = load i32, i32* @a, align 4
+	%5 = icmp eq i32 %4, 4
+; CHECK: br i1 false, label %bb4, label %bb5
+	br i1 %5, label %bb4, label %bb5
+
+bb4:		; preds = %bb3
+	%6 = load i32, i32* @a, align 4
+	%7 = add i32 %6, 5
+	br label %bb8
+
+bb5:		; preds = %bb3
+	%8 = load i32, i32* @a, align 4
+	%9 = icmp eq i32 %8, 5
+; CHECK: br i1 false, label %bb6, label %bb7
+	br i1 %9, label %bb6, label %bb7
+
+bb6:		; preds = %bb5
+	%10 = load i32, i32* @a, align 4
+	%11 = add i32 %10, 4
+	br label %bb8
+
+bb7:		; preds = %bb5
+	%12 = load i32, i32* @a, align 4
+	br label %bb8
+
+bb8:		; preds = %bb7, %bb6, %bb4, %bb2, %bb
+	%.0 = phi i32 [ %12, %bb7 ], [ %11, %bb6 ], [ %7, %bb4 ], [ 4, %bb2 ], [ 5, %bb ]
+	br label %return
+
+return:		; preds = %bb8
+	ret i32 %.0
+}
+;; NewGVN takes two passes to get test[6,8] and test[6,8]_fp's main part
+;; The icmp ne requires an equality table that inserts the inequalities for each
+;; discovered equality while processing.
+; CHECK-LABEL: @test6(
+define i1 @test6(i32 %x, i32 %y) {
+  %cmp2 = icmp ne i32 %x, %y
+  %cmp = icmp eq i32 %x, %y
+  %cmp3 = icmp eq i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK-LABEL: @test6_fp(
+define i1 @test6_fp(float %x, float %y) {
+  %cmp2 = fcmp une float %x, %y
+  %cmp = fcmp oeq float %x, %y
+  %cmp3 = fcmp oeq float  %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+; CHECK-LABEL: @test8(
+define i1 @test8(i32 %x, i32 %y) {
+  %cmp2 = icmp sle i32 %x, %y
+  %cmp = icmp sgt i32 %x, %y
+  %cmp3 = icmp sgt i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK-LABEL: @test8_fp(
+define i1 @test8_fp(float %x, float %y) {
+  %cmp2 = fcmp ule float %x, %y
+  %cmp = fcmp ogt float %x, %y
+  %cmp3 = fcmp ogt float %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+

Added: llvm/trunk/test/Transforms/NewGVN/condprop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/condprop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/condprop.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/condprop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+
+
+declare void @foo(i1)
+declare void @bar(i32)
+
+define void @test3(i32 %x, i32 %y) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
+; CHECK-NEXT:    [[Z:%.*]] = and i1 [[XZ]], [[YZ]]
+; CHECK-NEXT:    br i1 [[Z]], label [[BOTH_ZERO:%.*]], label [[NOPE:%.*]]
+; CHECK:       both_zero:
+; CHECK-NEXT:    call void @foo(i1 true)
+; CHECK-NEXT:    call void @foo(i1 true)
+; CHECK-NEXT:    call void @bar(i32 0)
+; CHECK-NEXT:    call void @bar(i32 0)
+; CHECK-NEXT:    ret void
+; CHECK:       nope:
+; CHECK-NEXT:    call void @foo(i1 false)
+; CHECK-NEXT:    ret void
+;
+  %xz = icmp eq i32 %x, 0
+  %yz = icmp eq i32 %y, 0
+  %z = and i1 %xz, %yz
+  br i1 %z, label %both_zero, label %nope
+both_zero:
+  call void @foo(i1 %xz)
+  call void @foo(i1 %yz)
+  call void @bar(i32 %x)
+  call void @bar(i32 %y)
+  ret void
+nope:
+  call void @foo(i1 %z)
+  ret void
+}
+define void @test4(i1 %b, i32 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    br i1 [[B:%.*]], label [[SW:%.*]], label [[CASE3:%.*]]
+; CHECK:       sw:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[CASE0:%.*]]
+; CHECK-NEXT:    i32 1, label [[CASE1:%.*]]
+; CHECK-NEXT:    i32 2, label [[CASE0]]
+; CHECK-NEXT:    i32 3, label [[CASE3]]
+; CHECK-NEXT:    i32 4, label [[DEFAULT]]
+; CHECK-NEXT:    ]
+; CHECK:       default:
+; CHECK-NEXT:    call void @bar(i32 [[X]])
+; CHECK-NEXT:    ret void
+; CHECK:       case0:
+; CHECK-NEXT:    call void @bar(i32 [[X]])
+; CHECK-NEXT:    ret void
+; CHECK:       case1:
+; CHECK-NEXT:    call void @bar(i32 1)
+; CHECK-NEXT:    ret void
+; CHECK:       case3:
+; CHECK-NEXT:    call void @bar(i32 [[X]])
+; CHECK-NEXT:    ret void
+;
+  br i1 %b, label %sw, label %case3
+sw:
+  switch i32 %x, label %default [
+  i32 0, label %case0
+  i32 1, label %case1
+  i32 2, label %case0
+  i32 3, label %case3
+  i32 4, label %default
+  ]
+default:
+  call void @bar(i32 %x)
+  ret void
+case0:
+  call void @bar(i32 %x)
+  ret void
+case1:
+  call void @bar(i32 %x)
+  ret void
+case3:
+  call void @bar(i32 %x)
+  ret void
+}
+
+define i1 @test5(i32 %x, i32 %y) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK:       same:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       different:
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = icmp ne i32 %x, %y
+  ret i1 %cmp2
+
+different:
+  %cmp3 = icmp eq i32 %x, %y
+  ret i1 %cmp3
+}
+
+
+define i1 @test7(i32 %x, i32 %y) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK:       same:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       different:
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = icmp sgt i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = icmp sle i32 %x, %y
+  ret i1 %cmp2
+
+different:
+  %cmp3 = icmp sgt i32 %x, %y
+  ret i1 %cmp3
+}
+
+define i1 @test7_fp(float %x, float %y) {
+; CHECK-LABEL: @test7_fp(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK:       same:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       different:
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ogt float %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = fcmp ule float %x, %y
+  ret i1 %cmp2
+
+different:
+  %cmp3 = fcmp ogt float %x, %y
+  ret i1 %cmp3
+}
+
+; PR1768
+define i32 @test9(i32 %i, i32 %j) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]]
+; CHECK:       cond_true:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       ret:
+; CHECK-NEXT:    ret i32 5
+;
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+  %diff = sub i32 %i, %j
+  ret i32 %diff
+
+ret:
+  ret i32 5
+}
+
+; PR1768
+define i32 @test10(i32 %j, i32 %i) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]]
+; CHECK:       cond_true:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       ret:
+; CHECK-NEXT:    ret i32 5
+;
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+  %diff = sub i32 %i, %j
+  ret i32 %diff
+
+ret:
+  ret i32 5
+}
+
+declare i32 @yogibar()
+
+define i32 @test11(i32 %x) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[V0:%.*]] = call i32 @yogibar()
+; CHECK-NEXT:    [[V1:%.*]] = call i32 @yogibar()
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V0]], [[V1]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[NEXT:%.*]]
+; CHECK:       cond_true:
+; CHECK-NEXT:    ret i32 [[V0]]
+; CHECK:       next:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[X:%.*]], [[V0]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE2:%.*]], label [[NEXT2:%.*]]
+; CHECK:       cond_true2:
+; CHECK-NEXT:    ret i32 [[X]]
+; CHECK:       next2:
+; CHECK-NEXT:    ret i32 0
+;
+  %v0 = call i32 @yogibar()
+  %v1 = call i32 @yogibar()
+  %cmp = icmp eq i32 %v0, %v1
+  br i1 %cmp, label %cond_true, label %next
+
+cond_true:
+  ret i32 %v1
+
+next:
+  %cmp2 = icmp eq i32 %x, %v0
+  br i1 %cmp2, label %cond_true2, label %next2
+
+cond_true2:
+  ret i32 %v0
+
+next2:
+  ret i32 0
+}
+
+define i32 @test12(i32 %x) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond_true:
+; CHECK-NEXT:    br label [[RET:%.*]]
+; CHECK:       cond_false:
+; CHECK-NEXT:    br label [[RET]]
+; CHECK:       ret:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[X]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:
+  br label %ret
+
+cond_false:
+  br label %ret
+
+ret:
+  %res = phi i32 [ %x, %cond_true ], [ %x, %cond_false ]
+  ret i32 %res
+}

Added: llvm/trunk/test/Transforms/NewGVN/crash-no-aa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/crash-no-aa.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/crash-no-aa.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/crash-no-aa.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt -disable-basicaa -newgvn -S < %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-freebsd8.0"
+
+; PR5744
+define i32 @test1({i16, i32} *%P) {
+  %P2 = getelementptr {i16, i32}, {i16, i32} *%P, i32 0, i32 0
+  store i16 42, i16* %P2
+
+  %P3 = getelementptr {i16, i32}, {i16, i32} *%P, i32 0, i32 1
+  %V = load i32, i32* %P3
+  ret i32 %V
+}
+

Added: llvm/trunk/test/Transforms/NewGVN/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/crash.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,201 @@
+; RUN: opt -newgvn -disable-output < %s
+
+; PR5631
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0"
+
+define i32* @test1(i8* %name, i32 %namelen, i32* %o, i32 %expected_type) nounwind ssp {
+entry:
+  br i1 undef, label %if.end13, label %while.body.preheader
+
+
+if.end13:                                         ; preds = %if.then6
+  br label %while.body.preheader
+
+while.body.preheader:                             ; preds = %if.end13, %if.end
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.backedge, %while.body.preheader
+  %o.addr.0 = phi i32* [ undef, %while.body.preheader ], [ %o.addr.0.be, %while.body.backedge ] ; <i32*> [#uses=2]
+  br i1 false, label %return.loopexit, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %while.body
+  %tmp20 = bitcast i32* %o.addr.0 to i32*         ; <i32*> [#uses=1]
+  %tmp22 = load i32, i32* %tmp20                       ; <i32> [#uses=0]
+  br i1 undef, label %land.lhs.true24, label %if.end31
+
+land.lhs.true24:                                  ; preds = %lor.lhs.false
+  %call28 = call i32* @parse_object(i8* undef) nounwind ; <i32*> [#uses=0]
+  br i1 undef, label %return.loopexit, label %if.end31
+
+if.end31:                                         ; preds = %land.lhs.true24, %lor.lhs.false
+  br i1 undef, label %return.loopexit, label %if.end41
+
+if.end41:                                         ; preds = %if.end31
+  %tmp43 = bitcast i32* %o.addr.0 to i32*         ; <i32*> [#uses=1]
+  %tmp45 = load i32, i32* %tmp43                       ; <i32> [#uses=0]
+  br i1 undef, label %if.then50, label %if.else
+
+if.then50:                                        ; preds = %if.end41
+  %tmp53 = load i32*, i32** undef                       ; <i32*> [#uses=1]
+  br label %while.body.backedge
+
+if.else:                                          ; preds = %if.end41
+  br i1 undef, label %if.then62, label %if.else67
+
+if.then62:                                        ; preds = %if.else
+  br label %while.body.backedge
+
+while.body.backedge:                              ; preds = %if.then62, %if.then50
+  %o.addr.0.be = phi i32* [ %tmp53, %if.then50 ], [ undef, %if.then62 ] ; <i32*> [#uses=1]
+  br label %while.body
+
+if.else67:                                        ; preds = %if.else
+  ret i32* null
+
+return.loopexit:                                  ; preds = %if.end31, %land.lhs.true24, %while.body
+  ret i32* undef
+}
+
+declare i32* @parse_object(i8*)
+
+
+
+
+
+
+%struct.attribute_spec = type { i8*, i32, i32, i8, i8, i8 }
+
+ at attribute_tables = external global [4 x %struct.attribute_spec*] ; <[4 x %struct.attribute_spec*]*> [#uses=2]
+
+define void @test2() nounwind {
+entry:
+  br label %bb69.i
+
+bb69.i:                                           ; preds = %bb57.i.preheader
+  %tmp4 = getelementptr inbounds [4 x %struct.attribute_spec*], [4 x %struct.attribute_spec*]* @attribute_tables, i32 0, i32 undef ; <%struct.attribute_spec**> [#uses=1]
+  %tmp3 = load %struct.attribute_spec*, %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1]
+  br label %bb65.i
+
+bb65.i:                                           ; preds = %bb65.i.preheader, %bb64.i
+  %storemerge6.i = phi i32 [ 1, %bb64.i ], [ 0, %bb69.i ] ; <i32> [#uses=3]
+  %scevgep14 = getelementptr inbounds %struct.attribute_spec, %struct.attribute_spec* %tmp3, i32 %storemerge6.i, i32 0 ; <i8**> [#uses=1]
+  %tmp2 = load i8*, i8** %scevgep14, align 4           ; <i8*> [#uses=0]
+  %tmp = load %struct.attribute_spec*, %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1]
+  %scevgep1516 = getelementptr inbounds %struct.attribute_spec, %struct.attribute_spec* %tmp, i32 %storemerge6.i, i32 0 ; <i8**> [#uses=0]
+  unreachable
+
+bb64.i:                                           ; Unreachable
+  br label %bb65.i
+
+bb66.i:                                           ; Unreachable
+  br label %bb69.i
+}
+
+
+
+; rdar://7438974
+
+ at g = external global i64, align 8
+
+define i32* @test3() {
+do.end17.i:
+  %tmp18.i = load i7*, i7** undef
+  %tmp1 = bitcast i7* %tmp18.i to i8*
+  br i1 undef, label %do.body36.i, label %if.then21.i
+
+if.then21.i:
+  %tmp2 = bitcast i7* %tmp18.i to i8*
+  ret i32* undef
+
+do.body36.i:
+  %ivar38.i = load i64, i64* @g 
+  %tmp3 = bitcast i7* %tmp18.i to i8*
+  %add.ptr39.sum.i = add i64 %ivar38.i, 8
+  %tmp40.i = getelementptr inbounds i8, i8* %tmp3, i64 %add.ptr39.sum.i
+  %tmp4 = bitcast i8* %tmp40.i to i64*
+  %tmp41.i = load i64, i64* %tmp4
+  br i1 undef, label %if.then48.i, label %do.body57.i
+
+if.then48.i:
+  %call54.i = call i32 @foo2()
+  br label %do.body57.i
+
+do.body57.i:
+  %tmp58.i = load i7*, i7** undef
+  %ivar59.i = load i64, i64* @g
+  %tmp5 = bitcast i7* %tmp58.i to i8*
+  %add.ptr65.sum.i = add i64 %ivar59.i, 8
+  %tmp66.i = getelementptr inbounds i8, i8* %tmp5, i64 %add.ptr65.sum.i
+  %tmp6 = bitcast i8* %tmp66.i to i64*
+  %tmp67.i = load i64, i64* %tmp6
+  ret i32* undef
+}
+
+declare i32 @foo2()
+
+
+
+define i32 @test4() {
+entry:
+  ret i32 0
+  
+dead:
+  %P2 = getelementptr i32, i32 *%P2, i32 52
+  %Q2 = getelementptr i32, i32 *%Q2, i32 52
+  store i32 4, i32* %P2
+  %A = load i32, i32* %Q2
+  br i1 true, label %dead, label %dead2
+  
+dead2:
+  ret i32 %A
+}
+
+
+; PR9841
+define fastcc i8 @test5(i8* %P) nounwind {
+entry:
+  %0 = load i8, i8* %P, align 2
+
+  %Q = getelementptr i8, i8* %P, i32 1
+  %1 = load i8, i8* %Q, align 1
+  ret i8 %1
+}
+
+
+; Test that a GEP in an unreachable block with the following form doesn't crash
+; GVN:
+;
+;    %x = gep %some.type %x, ...
+
+%struct.type = type { i64, i32, i32 }
+
+define fastcc void @func() nounwind uwtable ssp align 2 {
+entry:
+  br label %reachable.bb
+
+;; Unreachable code.
+
+unreachable.bb:
+  %gep.val = getelementptr inbounds %struct.type, %struct.type* %gep.val, i64 1
+  br i1 undef, label %u2.bb, label %u1.bb
+
+u1.bb:
+  %tmp1 = getelementptr inbounds %struct.type, %struct.type* %gep.val, i64 0, i32 0
+  store i64 -1, i64* %tmp1, align 8
+  br label %unreachable.bb
+
+u2.bb:
+  %0 = load i32, i32* undef, align 4
+  %conv.i.i.i.i.i = zext i32 %0 to i64
+  br label %u2.bb
+
+;; Reachable code.
+
+reachable.bb:
+  br label %r1.bb
+
+r1.bb:
+  br label %u2.bb
+}