[llvm] r358552 - Revert "Temporarily Revert "Add basic loop fusion pass.""

Tue Apr 16 21:53:01 PDT 2019

Added: llvm/trunk/test/Transforms/CodeExtractor/live_shrink.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/live_shrink.ll?rev=358552&view=auto
==============================================================================

--- llvm/trunk/test/Transforms/CodeExtractor/live_shrink.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/live_shrink.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt -S -partial-inliner  -skip-partial-inlining-cost-analysis  < %s |   FileCheck %s
+; RUN: opt -S -passes=partial-inliner  -skip-partial-inlining-cost-analysis  < %s   | FileCheck %s
+
+%class.A = type { i32 }
+ at cond = local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: uwtable
+define void @_Z3foov() local_unnamed_addr  {
+bb:
+  %tmp = alloca %class.A, align 4
+  %tmp1 = bitcast %class.A* %tmp to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1)
+  %tmp2 = load i32, i32* @cond, align 4, !tbaa !2
+  %tmp3 = icmp eq i32 %tmp2, 0
+  br i1 %tmp3, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb
+  call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) 
+
+declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr 
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) 
+
+; Function Attrs: uwtable
+define void @_Z3goov() local_unnamed_addr  {
+; CHECK-LABEL: @_Z3goov()
+bb:
+; CHECK: bb:
+; CHECK-NOT: alloca
+; CHECK-NOT: bitcast
+; CHECK-NOT: llvm.lifetime
+; CHECK: br i1
+; CHECK: codeRepl.i:
+; CHECK: call void @_Z3foov.1.
+
+  tail call void @_Z3foov()
+  ret void
+}
+
+; CHECK-LABEL: define internal void @_Z3foov.1.
+; CHECK: newFuncRoot:
+; CHECK-NEXT:  %tmp = alloca %class.A
+; CHECK-NEXT:  %tmp1 = bitcast %class.A* %tmp to i8*
+; CHECK-NEXT:  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1)
+; CHECK:  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1)
+; CHECK-NEXT:  br label %bb5.exitStub
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}

Added: llvm/trunk/test/Transforms/CodeExtractor/live_shrink_gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/live_shrink_gep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/live_shrink_gep.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/live_shrink_gep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,66 @@
+; RUN: opt -S -partial-inliner -skip-partial-inlining-cost-analysis  < %s   | FileCheck %s
+; RUN: opt -S -passes=partial-inliner  -skip-partial-inlining-cost-analysis < %s   | FileCheck %s
+
+%class.A = type { i8 }
+
+ at cond = local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: uwtable
+define void @_Z3foov() local_unnamed_addr  {
+bb:
+  %tmp = alloca %class.A, align 1
+  %tmp1 = getelementptr inbounds %class.A, %class.A* %tmp, i64 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %tmp1) 
+  %tmp2 = load i32, i32* @cond, align 4, !tbaa !2
+  %tmp3 = icmp eq i32 %tmp2, 0
+  br i1 %tmp3, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb
+  call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %tmp1) 
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) 
+
+declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr 
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) 
+
+; Function Attrs: uwtable
+define void @_Z3goov() local_unnamed_addr  {
+; CHECK-LABEL: @_Z3goov()
+bb:
+; CHECK: bb:
+; CHECK-NOT: alloca
+; CHECK-NOT: getelementptr
+; CHECK-NOT: llvm.lifetime
+; CHECK: br i1
+; CHECK: codeRepl.i:
+; CHECK: call void @_Z3foov.1.
+  tail call void @_Z3foov()
+  ret void
+}
+
+; CHECK-LABEL: define internal void @_Z3foov.1.
+; CHECK: newFuncRoot:
+; CHECK-NEXT:  %tmp = alloca %class.A
+; CHECK-NEXT:  %tmp1 = getelementptr
+; CHECK-NEXT:  call void @llvm.lifetime.start.p0i8
+; CHECK:  call void @llvm.lifetime.end.p0i8
+; CHECK-NEXT:  br label %bb5.exitStub
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}

Added: llvm/trunk/test/Transforms/CodeExtractor/live_shrink_hoist.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/live_shrink_hoist.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/live_shrink_hoist.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/live_shrink_hoist.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,70 @@
+; RUN: opt -S -partial-inliner -max-num-inline-blocks=3 -skip-partial-inlining-cost-analysis  < %s |   FileCheck %s
+; RUN: opt -S -passes=partial-inliner -max-num-inline-blocks=2  -skip-partial-inlining-cost-analysis < %s   | FileCheck %s
+
+%class.A = type { i32 }
+
+ at cond = local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: uwtable
+define void @_Z3foov() local_unnamed_addr  {
+bb:
+  %tmp = alloca %class.A, align 4
+  %tmp1 = bitcast %class.A* %tmp to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) 
+  %tmp2 = load i32, i32* @cond, align 4, !tbaa !2
+  %tmp3 = icmp eq i32 %tmp2, 0
+  br i1 %tmp3, label %bb4, label %bb9
+
+bb4:                                              ; preds = %bb
+  %foo = icmp eq i32 %tmp2, 0
+  br i1 %foo, label %bb5, label %bb9
+
+bb5:                                              ; preds = %bb4
+  call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+  %tmp5 = getelementptr inbounds %class.A, %class.A* %tmp, i64 0, i32 0
+  %tmp6 = load i32, i32* %tmp5, align 4, !tbaa !6
+  %tmp7 = icmp sgt i32 %tmp6, 0
+  br i1 %tmp7, label %bb9, label %bb8
+
+bb8:                                              ; preds = %bb4
+  call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+  br label %bb9
+
+bb9:                                              ; preds = %bb8, %bb4, %bb
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) 
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) 
+
+declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr 
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) 
+
+; Function Attrs: uwtable
+define void @_Z3goov() local_unnamed_addr  {
+bb:
+  tail call void @_Z3foov()
+  ret void
+}
+
+; CHECK-LABEL: define internal void @_Z3foov.1.
+; CHECK: bb9:
+; CHECK: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1)
+; CHECK:  br label %.exitStub
+
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}
+!6 = !{!7, !3, i64 0}
+!7 = !{!"_ZTS1A", !3, i64 0}

Added: llvm/trunk/test/Transforms/CodeExtractor/live_shrink_multiple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/live_shrink_multiple.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/live_shrink_multiple.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/live_shrink_multiple.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,66 @@
+; RUN: opt -S -partial-inliner -skip-partial-inlining-cost-analysis < %s   | FileCheck %s
+; RUN: opt -S -passes=partial-inliner -skip-partial-inlining-cost-analysis < %s   | FileCheck %s
+
+%class.A = type { i32 }
+ at cond = local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: uwtable
+define void @_Z3foov() local_unnamed_addr  {
+bb:
+  %tmp = alloca %class.A, align 4
+  %tmp1 = alloca %class.A, align 4
+  %tmp2 = bitcast %class.A* %tmp to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp2) 
+  %tmp3 = bitcast %class.A* %tmp1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp3) 
+  %tmp4 = load i32, i32* @cond, align 4, !tbaa !2
+  %tmp5 = icmp eq i32 %tmp4, 0
+  br i1 %tmp5, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb
+  call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+  br label %bb7
+
+bb7:                                              ; preds = %bb6, %bb
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp3) 
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp2) 
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) 
+
+declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr 
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) 
+
+; Function Attrs: uwtable
+define void @_Z3goov() local_unnamed_addr  {
+bb:
+  tail call void @_Z3foov()
+  ret void
+}
+
+; CHECK-LABEL: define internal void @_Z3foov.1.
+; CHECK: newFuncRoot:
+; CHECK-NEXT:  alloca 
+; CHECK-NEXT:  bitcast 
+; CHECK-NEXT:  call void @llvm.lifetime.start.p0i8
+; CHECK-NEXT:  alloca
+; CHECK-NEXT:  bitcast 
+; CHECK-NEXT:  call void @llvm.lifetime.start.p0i8
+; CHECK:  call void @llvm.lifetime.end.p0i8
+; CHECK-NEXT:  call void @llvm.lifetime.end.p0i8
+; CHECK-NEXT:  br label {{.*}}exitStub
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}

Added: llvm/trunk/test/Transforms/CodeExtractor/live_shrink_unsafe.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/live_shrink_unsafe.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/live_shrink_unsafe.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/live_shrink_unsafe.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,94 @@
+; The expected behavior of this file is expected to change when partial
+; inlining legality check is enhanced.
+
+; RUN: opt -S -partial-inliner -skip-partial-inlining-cost-analysis  < %s   | FileCheck %s
+; RUN: opt -S -passes=partial-inliner -skip-partial-inlining-cost-analysis < %s |   FileCheck %s
+
+%class.A = type { i32 }
+
+ at cond = local_unnamed_addr global i32 0, align 4
+ at condptr = external local_unnamed_addr global i32*, align 8
+
+; Function Attrs: uwtable
+define void @_Z3foo_unknown_mem_accessv() local_unnamed_addr  {
+bb:
+  %tmp = alloca %class.A, align 4
+  %tmp1 = alloca %class.A, align 4
+  %tmp2 = bitcast %class.A* %tmp to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp2) 
+  %tmp3 = bitcast %class.A* %tmp1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp3) 
+  %tmp4 = load i32*, i32** @condptr, align 8, !tbaa !2
+  %tmp5 = load i32, i32* %tmp4, align 4, !tbaa !6
+  %tmp6 = icmp eq i32 %tmp5, 0
+  br i1 %tmp6, label %bb7, label %bb8
+
+bb7:                                              ; preds = %bb
+  call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp3) 
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp2) 
+  ret void
+}
+
+declare void @_Z3barv() local_unnamed_addr
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) 
+declare void @_ZN1A7memfuncEv(%class.A*) local_unnamed_addr 
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) 
+
+define void @_Z3foo_unknown_calli(i32 %arg) local_unnamed_addr {
+bb:
+  %tmp = alloca %class.A, align 4
+  %tmp1 = bitcast %class.A* %tmp to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp1) 
+  tail call void @_Z3barv()
+  %tmp2 = icmp eq i32 %arg, 0
+  br i1 %tmp2, label %bb3, label %bb4
+
+bb3:                                              ; preds = %bb
+  call void @_ZN1A7memfuncEv(%class.A* nonnull %tmp)
+  br label %bb4
+
+bb4:                                              ; preds = %bb3, %bb
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp1) 
+  ret void
+}
+
+define void @_Z3goov() local_unnamed_addr  {
+; CHECK-LABEL: @_Z3goov
+; CHECK-NEXT: bb:
+; CHECK: alloca
+; CHECK: lifetime
+bb:
+  call void @_Z3foo_unknown_mem_accessv()
+  %tmp = load i32, i32* @cond, align 4, !tbaa !2
+  tail call void @_Z3foo_unknown_calli(i32 %tmp)
+  ret void
+}
+
+; CHECK-LABEL define internal void @_Z3foo_unknown_calli.1_bb3
+; CHECK: newFuncRoot:
+; CHECK-NEXT: br label %bb3
+
+; CHECK: bb4.exitStub:
+; CHECK-NEXT: ret void
+
+; CHECK: bb3:
+; CHECK-NOT: lifetime.ed
+; CHECK: br label %bb4.exitStub
+
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"any pointer", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"int", !4, i64 0}

Added: llvm/trunk/test/Transforms/CodeExtractor/unreachable-block.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/unreachable-block.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/unreachable-block.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/unreachable-block.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt -S -partial-inliner %s | FileCheck %s
+
+; CHECK-LABEL: define void @dipsy(
+; CHECK-NEXT:   call void @tinkywinky.1.ontrue()
+; CHECK-NEXT:   call void @patatuccio()
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+; CHECK-LABEL: define internal void @tinkywinky.1.ontrue() {
+; CHECK-NEXT: newFuncRoot:
+; CHECK-NEXT:   br label %ontrue
+; CHECK: onfalse{{.*}}:
+; CHECK-NEXT:   ret void
+; CHECK: ontrue:
+; CHECK-NEXT:   call void @patatino()
+; CHECK-NEXT:   br label %onfalse{{.*}}
+; CHECK-NEXT: }
+
+declare void @patatino()
+declare void @patatuccio()
+
+define fastcc void @tinkywinky() {
+  br i1 true, label %ontrue, label %onfalse
+ontrue:
+  call void @patatino()
+  br label %onfalse
+onfalse:
+  call void @patatuccio()
+  ret void
+cantreachme:
+  ret void
+}
+define void @dipsy() {
+  call fastcc void @tinkywinky()
+  ret void
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,511 @@
+; RUN: opt < %s -codegenprepare | llvm-dis
+; PR3113
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define fastcc i32 @ascii2flt(i8* %str) nounwind {
+entry:
+	br label %bb2.i
+
+bb2.i:		; preds = %bb4.i.bb2.i_crit_edge, %entry
+	br i1 false, label %bb4.i, label %base2flt.exit
+
+bb4.i:		; preds = %bb2.i
+	br i1 false, label %bb11.i, label %bb4.i.bb2.i_crit_edge
+
+bb4.i.bb2.i_crit_edge:		; preds = %bb4.i
+	br label %bb2.i
+
+bb11.i:		; preds = %bb4.i
+	br label %bb11.i.base2flt.exit204_crit_edge
+
+bb11.i.base2flt.exit204_crit_edge:		; preds = %bb11.i
+	br label %base2flt.exit204
+
+bb11.i.bb7.i197_crit_edge:		; No predecessors!
+	br label %bb7.i197
+
+base2flt.exit:		; preds = %bb2.i
+	br label %base2flt.exit.base2flt.exit204_crit_edge
+
+base2flt.exit.base2flt.exit204_crit_edge:		; preds = %base2flt.exit
+	br label %base2flt.exit204
+
+base2flt.exit.bb7.i197_crit_edge:		; No predecessors!
+	br label %bb7.i197
+
+bb10.i196:		; preds = %bb7.i197
+	br label %bb10.i196.base2flt.exit204_crit_edge
+
+bb10.i196.base2flt.exit204_crit_edge:		; preds = %bb7.i197, %bb10.i196
+	br label %base2flt.exit204
+
+bb10.i196.bb7.i197_crit_edge:		; No predecessors!
+	br label %bb7.i197
+
+bb7.i197:		; preds = %bb10.i196.bb7.i197_crit_edge, %base2flt.exit.bb7.i197_crit_edge, %bb11.i.bb7.i197_crit_edge
+	%.reg2mem.0 = phi i32 [ 0, %base2flt.exit.bb7.i197_crit_edge ], [ %.reg2mem.0, %bb10.i196.bb7.i197_crit_edge ], [ 0, %bb11.i.bb7.i197_crit_edge ]		; <i32> [#uses=1]
+	br i1 undef, label %bb10.i196.base2flt.exit204_crit_edge, label %bb10.i196
+
+base2flt.exit204:		; preds = %bb10.i196.base2flt.exit204_crit_edge, %base2flt.exit.base2flt.exit204_crit_edge, %bb11.i.base2flt.exit204_crit_edge
+	br i1 false, label %base2flt.exit204.bb8_crit_edge, label %bb
+
+base2flt.exit204.bb8_crit_edge:		; preds = %base2flt.exit204
+	br label %bb8
+
+bb:		; preds = %base2flt.exit204
+	br i1 false, label %bb.bb18_crit_edge, label %bb1.i
+
+bb.bb18_crit_edge:		; preds = %bb9, %bb
+	br label %bb18
+
+bb1.i:		; preds = %bb
+	br i1 false, label %bb1.i.bb7_crit_edge, label %bb1.i158
+
+bb1.i.bb7_crit_edge.loopexit:		; preds = %bb2.i164
+	br label %bb1.i.bb7_crit_edge
+
+bb1.i.bb7_crit_edge:		; preds = %bb1.i.bb7_crit_edge.loopexit, %bb1.i
+	br label %bb7.preheader
+
+bb1.i158:		; preds = %bb1.i
+	br i1 false, label %bb1.i158.bb10.i179_crit_edge, label %bb1.i158.bb2.i164_crit_edge
+
+bb1.i158.bb2.i164_crit_edge:		; preds = %bb1.i158
+	br label %bb2.i164
+
+bb1.i158.bb10.i179_crit_edge:		; preds = %bb1.i158
+	br label %bb10.i179
+
+bb2.i164:		; preds = %bb4.i166.bb2.i164_crit_edge, %bb1.i158.bb2.i164_crit_edge
+	br i1 false, label %bb4.i166, label %bb1.i.bb7_crit_edge.loopexit
+
+bb4.i166:		; preds = %bb2.i164
+	br i1 false, label %bb4.i166.bb11.i172_crit_edge, label %bb4.i166.bb2.i164_crit_edge
+
+bb4.i166.bb2.i164_crit_edge:		; preds = %bb4.i166
+	br label %bb2.i164
+
+bb4.i166.bb11.i172_crit_edge:		; preds = %bb4.i166
+	br label %bb11.i172
+
+bb11.i172:		; preds = %bb10.i179.bb11.i172_crit_edge, %bb4.i166.bb11.i172_crit_edge
+	br label %bb7.preheader
+
+bb10.i179:		; preds = %bb9.i182, %bb1.i158.bb10.i179_crit_edge
+	br i1 false, label %bb7.i180, label %bb10.i179.bb11.i172_crit_edge
+
+bb10.i179.bb11.i172_crit_edge:		; preds = %bb10.i179
+	br label %bb11.i172
+
+bb7.i180:		; preds = %bb10.i179
+	br i1 false, label %bb7.i180.bb7_crit_edge, label %bb9.i182
+
+bb7.i180.bb7_crit_edge:		; preds = %bb7.i180
+	br label %bb7.preheader
+
+bb7.preheader:		; preds = %bb7.i180.bb7_crit_edge, %bb11.i172, %bb1.i.bb7_crit_edge
+	br label %bb7
+
+bb9.i182:		; preds = %bb7.i180
+	br label %bb10.i179
+
+bb7:		; preds = %addflt.exit114, %bb7.preheader
+	switch i8 0, label %bb4 [
+		i8 0, label %bb7.bb8_crit_edge
+		i8 46, label %bb7.bb8_crit_edge
+	]
+
+bb7.bb8_crit_edge:		; preds = %bb7, %bb7
+	br label %bb8
+
+bb4:		; preds = %bb7
+	br i1 false, label %bb18.loopexit1, label %bb1.i5
+
+bb1.i5:		; preds = %bb4
+	br i1 false, label %bb1.i5.mulflt.exit157_crit_edge, label %bb3.i147
+
+bb1.i5.mulflt.exit157_crit_edge:		; preds = %bb5.i148, %bb1.i5
+	br label %mulflt.exit157
+
+bb3.i147:		; preds = %bb1.i5
+	br i1 false, label %bb3.i147.mulflt.exit157_crit_edge, label %bb5.i148
+
+bb3.i147.mulflt.exit157_crit_edge:		; preds = %bb8.i150, %bb3.i147
+	br label %mulflt.exit157
+
+bb5.i148:		; preds = %bb3.i147
+	br i1 false, label %bb1.i5.mulflt.exit157_crit_edge, label %bb7.i149
+
+bb7.i149:		; preds = %bb5.i148
+	br i1 false, label %bb8.i150, label %bb7.i149.bb12.i154_crit_edge
+
+bb7.i149.bb12.i154_crit_edge:		; preds = %bb7.i149
+	br label %bb12.i154
+
+bb8.i150:		; preds = %bb7.i149
+	br i1 false, label %bb3.i147.mulflt.exit157_crit_edge, label %bb10.i151
+
+bb10.i151:		; preds = %bb8.i150
+	br label %bb12.i154
+
+bb12.i154:		; preds = %bb10.i151, %bb7.i149.bb12.i154_crit_edge
+	br label %mulflt.exit157
+
+mulflt.exit157:		; preds = %bb12.i154, %bb3.i147.mulflt.exit157_crit_edge, %bb1.i5.mulflt.exit157_crit_edge
+	br i1 false, label %mulflt.exit157.base2flt.exit144_crit_edge, label %bb1.i115
+
+mulflt.exit157.base2flt.exit144_crit_edge.loopexit:		; preds = %bb2.i121
+	br label %mulflt.exit157.base2flt.exit144_crit_edge
+
+mulflt.exit157.base2flt.exit144_crit_edge:		; preds = %mulflt.exit157.base2flt.exit144_crit_edge.loopexit, %mulflt.exit157
+	br label %base2flt.exit144
+
+bb1.i115:		; preds = %mulflt.exit157
+	br i1 false, label %bb1.i115.bb10.i136_crit_edge, label %bb1.i115.bb2.i121_crit_edge
+
+bb1.i115.bb2.i121_crit_edge:		; preds = %bb1.i115
+	br label %bb2.i121
+
+bb1.i115.bb10.i136_crit_edge:		; preds = %bb1.i115
+	br label %bb10.i136
+
+bb2.i121:		; preds = %bb4.i123.bb2.i121_crit_edge, %bb1.i115.bb2.i121_crit_edge
+	br i1 false, label %bb4.i123, label %mulflt.exit157.base2flt.exit144_crit_edge.loopexit
+
+bb4.i123:		; preds = %bb2.i121
+	br i1 false, label %bb4.i123.bb11.i129_crit_edge, label %bb4.i123.bb2.i121_crit_edge
+
+bb4.i123.bb2.i121_crit_edge:		; preds = %bb4.i123
+	br label %bb2.i121
+
+bb4.i123.bb11.i129_crit_edge:		; preds = %bb4.i123
+	br label %bb11.i129
+
+bb11.i129:		; preds = %bb10.i136.bb11.i129_crit_edge, %bb4.i123.bb11.i129_crit_edge
+	br label %base2flt.exit144
+
+bb10.i136:		; preds = %bb9.i139, %bb1.i115.bb10.i136_crit_edge
+	br i1 false, label %bb7.i137, label %bb10.i136.bb11.i129_crit_edge
+
+bb10.i136.bb11.i129_crit_edge:		; preds = %bb10.i136
+	br label %bb11.i129
+
+bb7.i137:		; preds = %bb10.i136
+	br i1 false, label %bb7.i137.base2flt.exit144_crit_edge, label %bb9.i139
+
+bb7.i137.base2flt.exit144_crit_edge:		; preds = %bb7.i137
+	br label %base2flt.exit144
+
+bb9.i139:		; preds = %bb7.i137
+	br label %bb10.i136
+
+base2flt.exit144:		; preds = %bb7.i137.base2flt.exit144_crit_edge, %bb11.i129, %mulflt.exit157.base2flt.exit144_crit_edge
+	br i1 false, label %base2flt.exit144.addflt.exit114_crit_edge, label %bb3.i105
+
+base2flt.exit144.addflt.exit114_crit_edge:		; preds = %bb3.i105, %base2flt.exit144
+	br label %addflt.exit114
+
+bb3.i105:		; preds = %base2flt.exit144
+	br i1 false, label %base2flt.exit144.addflt.exit114_crit_edge, label %bb5.i106
+
+bb5.i106:		; preds = %bb3.i105
+	br i1 false, label %bb5.i106.bb9.i111_crit_edge, label %bb6.i107
+
+bb5.i106.bb9.i111_crit_edge:		; preds = %bb5.i106
+	br label %bb9.i111
+
+bb6.i107:		; preds = %bb5.i106
+	br i1 false, label %bb6.i107.addflt.exit114_crit_edge, label %bb8.i108
+
+bb6.i107.addflt.exit114_crit_edge:		; preds = %bb6.i107
+	br label %addflt.exit114
+
+bb8.i108:		; preds = %bb6.i107
+	br label %bb9.i111
+
+bb9.i111:		; preds = %bb8.i108, %bb5.i106.bb9.i111_crit_edge
+	br label %addflt.exit114
+
+addflt.exit114:		; preds = %bb9.i111, %bb6.i107.addflt.exit114_crit_edge, %base2flt.exit144.addflt.exit114_crit_edge
+	br label %bb7
+
+bb18.loopexit1:		; preds = %bb4
+	ret i32 -1
+
+bb18:		; preds = %bb8.bb18_crit_edge, %bb.bb18_crit_edge
+	ret i32 0
+
+bb8:		; preds = %bb7.bb8_crit_edge, %base2flt.exit204.bb8_crit_edge
+	br i1 false, label %bb9, label %bb8.bb18_crit_edge
+
+bb8.bb18_crit_edge:		; preds = %bb8
+	br label %bb18
+
+bb9:		; preds = %bb8
+	br i1 false, label %bb.bb18_crit_edge, label %bb1.i13
+
+bb1.i13:		; preds = %bb9
+	br i1 false, label %bb1.i13.base2flt.exit102_crit_edge, label %bb1.i73
+
+bb1.i13.base2flt.exit102_crit_edge.loopexit:		; preds = %bb2.i79
+	br label %bb1.i13.base2flt.exit102_crit_edge
+
+bb1.i13.base2flt.exit102_crit_edge:		; preds = %bb1.i13.base2flt.exit102_crit_edge.loopexit, %bb1.i13
+	br label %base2flt.exit102
+
+bb1.i73:		; preds = %bb1.i13
+	br i1 false, label %bb1.i73.bb10.i94_crit_edge, label %bb1.i73.bb2.i79_crit_edge
+
+bb1.i73.bb2.i79_crit_edge:		; preds = %bb1.i73
+	br label %bb2.i79
+
+bb1.i73.bb10.i94_crit_edge:		; preds = %bb1.i73
+	br label %bb10.i94
+
+bb2.i79:		; preds = %bb4.i81.bb2.i79_crit_edge, %bb1.i73.bb2.i79_crit_edge
+	br i1 false, label %bb4.i81, label %bb1.i13.base2flt.exit102_crit_edge.loopexit
+
+bb4.i81:		; preds = %bb2.i79
+	br i1 false, label %bb4.i81.bb11.i87_crit_edge, label %bb4.i81.bb2.i79_crit_edge
+
+bb4.i81.bb2.i79_crit_edge:		; preds = %bb4.i81
+	br label %bb2.i79
+
+bb4.i81.bb11.i87_crit_edge:		; preds = %bb4.i81
+	br label %bb11.i87
+
+bb11.i87:		; preds = %bb10.i94.bb11.i87_crit_edge, %bb4.i81.bb11.i87_crit_edge
+	br label %base2flt.exit102
+
+bb10.i94:		; preds = %bb9.i97, %bb1.i73.bb10.i94_crit_edge
+	br i1 false, label %bb7.i95, label %bb10.i94.bb11.i87_crit_edge
+
+bb10.i94.bb11.i87_crit_edge:		; preds = %bb10.i94
+	br label %bb11.i87
+
+bb7.i95:		; preds = %bb10.i94
+	br i1 false, label %bb7.i95.base2flt.exit102_crit_edge, label %bb9.i97
+
+bb7.i95.base2flt.exit102_crit_edge:		; preds = %bb7.i95
+	br label %base2flt.exit102
+
+bb9.i97:		; preds = %bb7.i95
+	br label %bb10.i94
+
+base2flt.exit102:		; preds = %bb7.i95.base2flt.exit102_crit_edge, %bb11.i87, %bb1.i13.base2flt.exit102_crit_edge
+	br i1 false, label %base2flt.exit102.mulflt.exit72_crit_edge, label %bb3.i62
+
+base2flt.exit102.mulflt.exit72_crit_edge:		; preds = %bb5.i63, %base2flt.exit102
+	br label %mulflt.exit72
+
+bb3.i62:		; preds = %base2flt.exit102
+	br i1 false, label %bb3.i62.mulflt.exit72_crit_edge, label %bb5.i63
+
+bb3.i62.mulflt.exit72_crit_edge:		; preds = %bb8.i65, %bb3.i62
+	br label %mulflt.exit72
+
+bb5.i63:		; preds = %bb3.i62
+	br i1 false, label %base2flt.exit102.mulflt.exit72_crit_edge, label %bb7.i64
+
+bb7.i64:		; preds = %bb5.i63
+	br i1 false, label %bb8.i65, label %bb7.i64.bb12.i69_crit_edge
+
+bb7.i64.bb12.i69_crit_edge:		; preds = %bb7.i64
+	br label %bb12.i69
+
+bb8.i65:		; preds = %bb7.i64
+	br i1 false, label %bb3.i62.mulflt.exit72_crit_edge, label %bb10.i66
+
+bb10.i66:		; preds = %bb8.i65
+	br label %bb12.i69
+
+bb12.i69:		; preds = %bb10.i66, %bb7.i64.bb12.i69_crit_edge
+	br label %mulflt.exit72
+
+mulflt.exit72:		; preds = %bb12.i69, %bb3.i62.mulflt.exit72_crit_edge, %base2flt.exit102.mulflt.exit72_crit_edge
+	br i1 false, label %mulflt.exit72.bb10.i58_crit_edge, label %bb3.i50
+
+mulflt.exit72.bb10.i58_crit_edge:		; preds = %bb3.i50, %mulflt.exit72
+	br label %bb10.i58
+
+bb3.i50:		; preds = %mulflt.exit72
+	br i1 false, label %mulflt.exit72.bb10.i58_crit_edge, label %bb5.i51
+
+bb5.i51:		; preds = %bb3.i50
+	br i1 false, label %bb5.i51.bb9.i56_crit_edge, label %bb6.i52
+
+bb5.i51.bb9.i56_crit_edge:		; preds = %bb5.i51
+	br label %bb9.i56
+
+bb6.i52:		; preds = %bb5.i51
+	br i1 false, label %bb6.i52.bb10.i58_crit_edge, label %bb8.i53
+
+bb6.i52.bb10.i58_crit_edge:		; preds = %bb6.i52
+	br label %bb10.i58
+
+bb8.i53:		; preds = %bb6.i52
+	br label %bb9.i56
+
+bb9.i56:		; preds = %bb8.i53, %bb5.i51.bb9.i56_crit_edge
+	br label %bb15.preheader
+
+bb10.i58:		; preds = %bb6.i52.bb10.i58_crit_edge, %mulflt.exit72.bb10.i58_crit_edge
+	br label %bb15.preheader
+
+bb15.preheader:		; preds = %bb10.i58, %bb9.i56
+	br label %bb15
+
+bb15:		; preds = %addflt.exit, %bb15.preheader
+	br i1 false, label %bb15.bb18.loopexit_crit_edge, label %bb12
+
+bb15.bb18.loopexit_crit_edge:		; preds = %bb15
+	br label %bb18.loopexit
+
+bb12:		; preds = %bb15
+	br i1 false, label %bb12.bb18.loopexit_crit_edge, label %bb1.i21
+
+bb12.bb18.loopexit_crit_edge:		; preds = %bb12
+	br label %bb18.loopexit
+
+bb1.i21:		; preds = %bb12
+	br i1 false, label %bb1.i21.mulflt.exit47_crit_edge, label %bb3.i37
+
+bb1.i21.mulflt.exit47_crit_edge:		; preds = %bb5.i38, %bb1.i21
+	br label %mulflt.exit47
+
+bb3.i37:		; preds = %bb1.i21
+	br i1 false, label %bb3.i37.mulflt.exit47_crit_edge, label %bb5.i38
+
+bb3.i37.mulflt.exit47_crit_edge:		; preds = %bb8.i40, %bb3.i37
+	br label %mulflt.exit47
+
+bb5.i38:		; preds = %bb3.i37
+	br i1 false, label %bb1.i21.mulflt.exit47_crit_edge, label %bb7.i39
+
+bb7.i39:		; preds = %bb5.i38
+	br i1 false, label %bb8.i40, label %bb7.i39.bb12.i44_crit_edge
+
+bb7.i39.bb12.i44_crit_edge:		; preds = %bb7.i39
+	br label %bb12.i44
+
+bb8.i40:		; preds = %bb7.i39
+	br i1 false, label %bb3.i37.mulflt.exit47_crit_edge, label %bb10.i41
+
+bb10.i41:		; preds = %bb8.i40
+	br label %bb12.i44
+
+bb12.i44:		; preds = %bb10.i41, %bb7.i39.bb12.i44_crit_edge
+	br label %mulflt.exit47
+
+mulflt.exit47:		; preds = %bb12.i44, %bb3.i37.mulflt.exit47_crit_edge, %bb1.i21.mulflt.exit47_crit_edge
+	br i1 false, label %mulflt.exit47.base2flt.exit34_crit_edge, label %bb1.i15
+
+mulflt.exit47.base2flt.exit34_crit_edge.loopexit:		; preds = %bb2.i20
+	br label %mulflt.exit47.base2flt.exit34_crit_edge
+
+mulflt.exit47.base2flt.exit34_crit_edge:		; preds = %mulflt.exit47.base2flt.exit34_crit_edge.loopexit, %mulflt.exit47
+	br label %base2flt.exit34
+
+bb1.i15:		; preds = %mulflt.exit47
+	br i1 false, label %bb1.i15.bb10.i31_crit_edge, label %bb1.i15.bb2.i20_crit_edge
+
+bb1.i15.bb2.i20_crit_edge:		; preds = %bb1.i15
+	br label %bb2.i20
+
+bb1.i15.bb10.i31_crit_edge:		; preds = %bb1.i15
+	br label %bb10.i31
+
+bb2.i20:		; preds = %bb4.i22.bb2.i20_crit_edge, %bb1.i15.bb2.i20_crit_edge
+	br i1 false, label %bb4.i22, label %mulflt.exit47.base2flt.exit34_crit_edge.loopexit
+
+bb4.i22:		; preds = %bb2.i20
+	br i1 false, label %bb4.i22.bb11.i28_crit_edge, label %bb4.i22.bb2.i20_crit_edge
+
+bb4.i22.bb2.i20_crit_edge:		; preds = %bb4.i22
+	br label %bb2.i20
+
+bb4.i22.bb11.i28_crit_edge:		; preds = %bb4.i22
+	br label %bb11.i28
+
+bb11.i28:		; preds = %bb10.i31.bb11.i28_crit_edge, %bb4.i22.bb11.i28_crit_edge
+	br label %base2flt.exit34
+
+bb10.i31:		; preds = %bb9.i33, %bb1.i15.bb10.i31_crit_edge
+	br i1 false, label %bb7.i32, label %bb10.i31.bb11.i28_crit_edge
+
+bb10.i31.bb11.i28_crit_edge:		; preds = %bb10.i31
+	br label %bb11.i28
+
+bb7.i32:		; preds = %bb10.i31
+	br i1 false, label %bb7.i32.base2flt.exit34_crit_edge, label %bb9.i33
+
+bb7.i32.base2flt.exit34_crit_edge:		; preds = %bb7.i32
+	br label %base2flt.exit34
+
+bb9.i33:		; preds = %bb7.i32
+	br label %bb10.i31
+
+base2flt.exit34:		; preds = %bb7.i32.base2flt.exit34_crit_edge, %bb11.i28, %mulflt.exit47.base2flt.exit34_crit_edge
+	br i1 false, label %base2flt.exit34.mulflt.exit_crit_edge, label %bb3.i9
+
+base2flt.exit34.mulflt.exit_crit_edge:		; preds = %bb5.i10, %base2flt.exit34
+	br label %mulflt.exit
+
+bb3.i9:		; preds = %base2flt.exit34
+	br i1 false, label %bb3.i9.mulflt.exit_crit_edge, label %bb5.i10
+
+bb3.i9.mulflt.exit_crit_edge:		; preds = %bb8.i11, %bb3.i9
+	br label %mulflt.exit
+
+bb5.i10:		; preds = %bb3.i9
+	br i1 false, label %base2flt.exit34.mulflt.exit_crit_edge, label %bb7.i
+
+bb7.i:		; preds = %bb5.i10
+	br i1 false, label %bb8.i11, label %bb7.i.bb12.i_crit_edge
+
+bb7.i.bb12.i_crit_edge:		; preds = %bb7.i
+	br label %bb12.i
+
+bb8.i11:		; preds = %bb7.i
+	br i1 false, label %bb3.i9.mulflt.exit_crit_edge, label %bb10.i12
+
+bb10.i12:		; preds = %bb8.i11
+	br label %bb12.i
+
+bb12.i:		; preds = %bb10.i12, %bb7.i.bb12.i_crit_edge
+	br label %mulflt.exit
+
+mulflt.exit:		; preds = %bb12.i, %bb3.i9.mulflt.exit_crit_edge, %base2flt.exit34.mulflt.exit_crit_edge
+	br i1 false, label %mulflt.exit.addflt.exit_crit_edge, label %bb3.i
+
+mulflt.exit.addflt.exit_crit_edge:		; preds = %bb3.i, %mulflt.exit
+	br label %addflt.exit
+
+bb3.i:		; preds = %mulflt.exit
+	br i1 false, label %mulflt.exit.addflt.exit_crit_edge, label %bb5.i
+
+bb5.i:		; preds = %bb3.i
+	br i1 false, label %bb5.i.bb9.i_crit_edge, label %bb6.i
+
+bb5.i.bb9.i_crit_edge:		; preds = %bb5.i
+	br label %bb9.i
+
+bb6.i:		; preds = %bb5.i
+	br i1 false, label %bb6.i.addflt.exit_crit_edge, label %bb8.i
+
+bb6.i.addflt.exit_crit_edge:		; preds = %bb6.i
+	br label %addflt.exit
+
+bb8.i:		; preds = %bb6.i
+	br label %bb9.i
+
+bb9.i:		; preds = %bb8.i, %bb5.i.bb9.i_crit_edge
+	br label %addflt.exit
+
+addflt.exit:		; preds = %bb9.i, %bb6.i.addflt.exit_crit_edge, %mulflt.exit.addflt.exit_crit_edge
+	br label %bb15
+
+bb18.loopexit:		; preds = %bb12.bb18.loopexit_crit_edge, %bb15.bb18.loopexit_crit_edge
+	ret i32 0
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,82 @@
+; RUN: opt -S -codegenprepare -mtriple=aarch64-linux %s | FileCheck -enable-var-scope %s
+
+; Test for CodeGenPrepare::optimizeLoadExt(): simple case: two loads
+; feeding a phi that zext's each loaded value.
+define i32 @test_free_zext(i32* %ptr, i32* %ptr2, i32 %c) {
+; CHECK-LABEL: @test_free_zext(
+bb1:
+; CHECK: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+  %load1 = load i32, i32* %ptr, align 4
+  %cmp = icmp ne i32 %c, 0
+  br i1 %cmp, label %bb2, label %bb3
+bb2:
+; CHECK: bb2:
+; CHECK: %[[T2:.*]] = load
+; CHECK: %[[A2:.*]] = and i32 %[[T2]], 65535
+  %load2 = load i32, i32* %ptr2, align 4
+  br label %bb3
+bb3:
+; CHECK: bb3:
+; CHECK: phi i32 [ %[[A1]], %bb1 ], [ %[[A2]], %bb2 ]
+  %phi = phi i32 [ %load1, %bb1 ], [ %load2, %bb2 ]
+  %and = and i32 %phi, 65535
+  ret i32 %and
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): exercise all opcode
+; cases of active bit calculation.
+define i32 @test_free_zext2(i32* %ptr, i16* %dst16, i32* %dst32, i32 %c) {
+; CHECK-LABEL: @test_free_zext2(
+bb1:
+; CHECK: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+  %load1 = load i32, i32* %ptr, align 4
+  %cmp = icmp ne i32 %c, 0
+  br i1 %cmp, label %bb2, label %bb4
+bb2:
+; CHECK: bb2:
+  %trunc = trunc i32 %load1 to i16
+  store i16 %trunc, i16* %dst16, align 2
+  br i1 %cmp, label %bb3, label %bb4
+bb3:
+; CHECK: bb3:
+  %shl = shl i32 %load1, 16
+  store i32 %shl, i32* %dst32, align 4
+  br label %bb4
+bb4:
+; CHECK: bb4:
+; CHECK-NOT: and
+; CHECK: ret i32 %[[A1]]
+  %and = and i32 %load1, 65535
+  ret i32 %and
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): check case of zext-able
+; load feeding a phi in the same block.
+define void @test_free_zext3(i32* %ptr, i32* %ptr2, i32* %dst, i64* %c) {
+; CHECK-LABEL: @test_free_zext3(
+bb1:
+; CHECK: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+  %load1 = load i32, i32* %ptr, align 4
+  br label %loop
+loop:
+; CHECK: loop:
+; CHECK: phi i32 [ %[[A1]], %bb1 ], [ %[[A2:.*]], %loop ]
+  %phi = phi i32 [ %load1, %bb1 ], [ %load2, %loop ]
+  %and = and i32 %phi, 65535
+  store i32 %and, i32* %dst, align 4
+  %idx = load volatile i64, i64* %c, align 4
+  %addr = getelementptr inbounds i32, i32* %ptr2, i64 %idx
+; CHECK: %[[T2:.*]] = load i32
+; CHECK: %[[A2]] = and i32 %[[T2]], 65535
+  %load2 = load i32, i32* %addr, align 4
+  %cmp = icmp ne i64 %idx, 0
+  br i1 %cmp, label %loop, label %end
+end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+
+%struct_type = type { [10000 x i32], i32, i32 }
+
+define void @test1(%struct_type** %s, i32 %n) {
+; CHECK-LABEL: test1
+entry:
+  %struct = load %struct_type*, %struct_type** %s
+  br label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+}
+
+define void @test2(%struct_type* %struct, i32 %n) {
+; CHECK-LABEL: test2
+entry:
+  %cmp = icmp eq %struct_type* %struct, null
+  br i1 %cmp, label %while_end, label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp1 = icmp slt i32 %phi, %n
+  br i1 %cmp1, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+}
+
+define void @test3(%struct_type* %s1, %struct_type* %s2, i1 %cond, i32 %n) {
+; CHECK-LABEL: test3
+entry:
+  br i1 %cond, label %if_true, label %if_end
+
+if_true:
+  br label %if_end
+
+if_end:
+  %struct = phi %struct_type* [ %s1, %entry ], [ %s2, %if_true ]
+  %cmp = icmp eq %struct_type* %struct, null
+  br i1 %cmp, label %while_end, label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %if_end ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp1 = icmp slt i32 %phi, %n
+  br i1 %cmp1, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+}
+
+declare %struct_type* @foo()
+declare void @foo2()
+
+define void @test4(i32 %n) personality i32 (...)* @__FrameHandler {
+; CHECK-LABEL: test4
+entry:
+  br label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+  %struct = invoke %struct_type* @foo() to label %while_cond_x unwind label %cleanup
+
+while_cond_x:
+; CHECK:     mov     w{{[0-9]+}}, #40000
+; CHECK-NOT: mov     w{{[0-9]+}}, #40004
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  store i32 0, i32* %gep0
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+
+cleanup:
+  %x10 = landingpad { i8*, i32 }
+          cleanup
+  call void @foo2()
+  resume { i8*, i32 } %x10
+}
+
+declare i32 @__FrameHandler(...)
+
+define void @test5([65536 x i32]** %s, i32 %n) {
+; CHECK-LABEL: test5
+entry:
+  %struct = load [65536 x i32]*, [65536 x i32]** %s
+  br label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+; CHECK:     mov     w{{[0-9]+}}, #14464
+; CHECK-NOT: mov     w{{[0-9]+}}, #14468
+  %gep0 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20000
+  %gep1 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20001
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK:     str      w{{[0-9]+}}, [x{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+}
+
+declare i8* @llvm.strip.invariant.group.p0i8(i8*)
+
+define void @test_invariant_group(i32) {
+; CHECK-LABEL: test_invariant_group
+  br i1 undef, label %8, label %7
+
+; <label>:2:                                      ; preds = %8, %2
+  br i1 undef, label %2, label %7
+
+; <label>:3:                                      ; preds = %8
+  %4 = getelementptr inbounds i8, i8* %9, i32 40000
+  %5 = bitcast i8* %4 to i64*
+  br i1 undef, label %7, label %6
+
+; <label>:6:                                      ; preds = %3
+  store i64 1, i64* %5, align 8
+  br label %7
+
+; <label>:7:                                      ; preds = %6, %3, %2, %1
+  ret void
+
+; <label>:8:                                      ; preds = %1
+  %9 = call i8* @llvm.strip.invariant.group.p0i8(i8* nonnull undef)
+  %10 = icmp eq i32 %0, 0
+  br i1 %10, label %3, label %2
+}
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -codegenprepare -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown"
+
+define <8 x i16> @sink_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
+; CHECK-LABEL: @sink_zext(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
+; CHECK-NEXT:    ret <8 x i16> [[RES_1]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+; CHECK-NEXT:    [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]]
+; CHECK-NEXT:    ret <8 x i16> [[RES_2]]
+;
+entry:
+  %za = zext <8 x i8> %a to <8 x i16>
+  br i1 %c, label %if.then, label %if.else
+
+if.then:
+  %zb.1 = zext <8 x i8> %b to <8 x i16>
+  %res.1 = add <8 x i16> %za, %zb.1
+  ret <8 x i16> %res.1
+
+if.else:
+  %zb.2 = zext <8 x i8> %b to <8 x i16>
+  %res.2 = sub <8 x i16> %za, %zb.2
+  ret <8 x i16> %res.2
+}
+
+define <8 x i16> @sink_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
+; CHECK-LABEL: @sink_sext(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
+; CHECK-NEXT:    ret <8 x i16> [[RES_1]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+; CHECK-NEXT:    [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]]
+; CHECK-NEXT:    ret <8 x i16> [[RES_2]]
+;
+entry:
+  %za = sext <8 x i8> %a to <8 x i16>
+  br i1 %c, label %if.then, label %if.else
+
+if.then:
+  %zb.1 = sext <8 x i8> %b to <8 x i16>
+  %res.1 = add <8 x i16> %za, %zb.1
+  ret <8 x i16> %res.1
+
+if.else:
+  %zb.2 = sext <8 x i8> %b to <8 x i16>
+  %res.2 = sub <8 x i16> %za, %zb.2
+  ret <8 x i16> %res.2
+}
+
+define <8 x i16> @do_not_sink_nonfree_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
+; CHECK-LABEL: @do_not_sink_nonfree_zext(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
+; CHECK-NEXT:    ret <8 x i16> [[RES_1]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[ZB_2]]
+;
+entry:
+  %za = sext <8 x i8> %a to <8 x i16>
+  br i1 %c, label %if.then, label %if.else
+
+if.then:
+  %zb.1 = sext <8 x i8> %b to <8 x i16>
+  %res.1 = add <8 x i16> %za, %zb.1
+  ret <8 x i16> %res.1
+
+if.else:
+  %zb.2 = sext <8 x i8> %b to <8 x i16>
+  ret <8 x i16> %zb.2
+}
+
+define <8 x i16> @do_not_sink_nonfree_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
+; CHECK-LABEL: @do_not_sink_nonfree_sext(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
+; CHECK-NEXT:    ret <8 x i16> [[RES_1]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[ZB_2]]
+;
+entry:
+  %za = sext <8 x i8> %a to <8 x i16>
+  br i1 %c, label %if.then, label %if.else
+
+if.then:
+  %zb.1 = sext <8 x i8> %b to <8 x i16>
+  %res.1 = add <8 x i16> %za, %zb.1
+  ret <8 x i16> %res.1
+
+if.else:
+  %zb.2 = sext <8 x i8> %b to <8 x i16>
+  ret <8 x i16> %zb.2
+}
+
+; The masks used are suitable for umull, sink shufflevector to users.
+define <8 x i16> @sink_shufflevector_umull(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @sink_shufflevector_umull(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP0]], <8 x i8> [[S2]])
+; CHECK-NEXT:    ret <8 x i16> [[VMULL0]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[S4]])
+; CHECK-NEXT:    ret <8 x i16> [[VMULL1]]
+;
+entry:
+  %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
+  ret <8 x i16> %vmull0
+
+if.else:
+  %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
+  ret <8 x i16> %vmull1
+}
+
+; Both exts and their shufflevector operands can be sunk.
+define <8 x i16> @sink_shufflevector_ext_subadd(<16 x i8> %a, <16 x i8> %b) {
+entry:
+  %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %z1 = zext <8 x i8> %s1 to <8 x i16>
+  %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %z3 = sext <8 x i8> %s3 to <8 x i16>
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %z2 = zext <8 x i8> %s2 to <8 x i16>
+  %res1 = add <8 x i16> %z1, %z2
+  ret <8 x i16> %res1
+
+if.else:
+  %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %z4 = sext <8 x i8> %s4 to <8 x i16>
+  %res2 = sub <8 x i16> %z3, %z4
+  ret <8 x i16> %res2
+}
+
+
+declare void @user1(<8 x i16>)
+
+; Both exts and their shufflevector operands can be sunk.
+define <8 x i16> @sink_shufflevector_ext_subadd_multiuse(<16 x i8> %a, <16 x i8> %b) {
+entry:
+  %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %z1 = zext <8 x i8> %s1 to <8 x i16>
+  %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %z3 = sext <8 x i8> %s3 to <8 x i16>
+  call void @user1(<8 x i16> %z3)
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %z2 = zext <8 x i8> %s2 to <8 x i16>
+  %res1 = add <8 x i16> %z1, %z2
+  ret <8 x i16> %res1
+
+if.else:
+  %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %z4 = sext <8 x i8> %s4 to <8 x i16>
+  %res2 = sub <8 x i16> %z3, %z4
+  ret <8 x i16> %res2
+}
+
+
+; The masks used are not suitable for umull, do not sink.
+define <8 x i16> @no_sink_shufflevector_umull(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @no_sink_shufflevector_umull(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[S1]], <8 x i8> [[S2]])
+; CHECK-NEXT:    ret <8 x i16> [[VMULL0]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 10, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[S3]], <8 x i8> [[S4]])
+; CHECK-NEXT:    ret <8 x i16> [[VMULL1]]
+;
+entry:
+  %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7>
+  %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
+  ret <8 x i16> %vmull0
+
+if.else:
+  %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 10, i32 12, i32 13, i32 14, i32 15>
+  %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
+  ret <8 x i16> %vmull1
+}
+
+
+; Function Attrs: nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) #2

Added: llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt -S -codegenprepare -mtriple=arm64-apple-ios7.0 %s | FileCheck %s
+
+%foo = type { i8 }
+
+define %foo @test_merge(i32 %in) {
+; CHECK-LABEL: @test_merge
+
+  ; CodeGenPrepare was requesting the EVT for { i8 } to determine
+  ; whether the insertvalue user of the trunc was legal. This
+  ; asserted.
+
+; CHECK: insertvalue %foo undef, i8 %byte, 0
+  %lobit = lshr i32 %in, 31
+  %byte = trunc i32 %lobit to i8
+  %struct = insertvalue %foo undef, i8 %byte, 0
+  ret %"foo" %struct
+}
+
+define i64* @test_merge_PR21548(i32 %a, i64* %p1, i64* %p2, i64* %p3) {
+; CHECK-LABEL: @test_merge_PR21548
+  %as = lshr i32 %a, 3
+  %Tr = trunc i32 %as to i1
+  br i1 %Tr, label %BB2, label %BB3
+
+BB2:
+  ; Similarly to above:
+  ; CodeGenPrepare was requesting the EVT for i8* to determine
+  ; whether the select user of the trunc was legal. This asserted.
+
+; CHECK: select i1 {{%.*}}, i64* %p1, i64* %p2
+  %p = select i1 %Tr, i64* %p1, i64* %p2
+  ret i64* %p
+
+BB3:
+  ret i64* %p3
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,95 @@
+;; AArch64 is arbitralily chosen as a 32/64-bit RISC representative to show the transform in all tests.
+
+; RUN: opt < %s -codegenprepare -S -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=ARM64
+
+; AArch64 widens to 32-bit.
+
+define i32 @widen_switch_i16(i32 %a)  {
+entry:
+  %trunc = trunc i32 %a to i16
+  switch i16 %trunc, label %sw.default [
+    i16 1, label %sw.bb0
+    i16 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i16(
+; ARM64:       %0 = zext i16 %trunc to i32
+; ARM64-NEXT:  switch i32 %0, label %sw.default [
+; ARM64-NEXT:    i32 1, label %sw.bb0
+; ARM64-NEXT:    i32 65535, label %sw.bb1
+}
+
+; Widen to 32-bit from a smaller, non-native type.
+
+define i32 @widen_switch_i17(i32 %a)  {
+entry:
+  %trunc = trunc i32 %a to i17
+  switch i17 %trunc, label %sw.default [
+    i17 10, label %sw.bb0
+    i17 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i17(
+; ARM64:       %0 = zext i17 %trunc to i32
+; ARM64-NEXT:  switch i32 %0, label %sw.default [
+; ARM64-NEXT:    i32 10, label %sw.bb0
+; ARM64-NEXT:    i32 131071, label %sw.bb1
+}
+
+; If the switch condition is a sign-extended function argument, then the
+; condition and cases should be sign-extended rather than zero-extended
+; because the sign-extension can be optimized away.
+
+define i32 @widen_switch_i16_sext(i2 signext %a)  {
+entry:
+  switch i2 %a, label %sw.default [
+    i2 1, label %sw.bb0
+    i2 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i16_sext(
+; ARM64:       %0 = sext i2 %a to i32
+; ARM64-NEXT:  switch i32 %0, label %sw.default [
+; ARM64-NEXT:    i32 1, label %sw.bb0
+; ARM64-NEXT:    i32 -1, label %sw.bb1
+}
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,49 @@
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown < %s | FileCheck -check-prefix=ASC -check-prefix=COMMON %s
+
+; COMMON-LABEL: @test_sink_ptrtoint_asc(
+; ASC: addrspacecast
+; ASC-NOT: ptrtoint
+; ASC-NOT: inttoptr
+
+define amdgpu_kernel void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 {
+bb:
+  %tmp = getelementptr inbounds float, float addrspace(3)* %arg2, i32 16
+  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tmp3 = sext i32 %tmp2 to i64
+  %tmp4 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp3
+  %tmp5 = load float, float addrspace(1)* %tmp4, align 4
+  %tmp6 = addrspacecast float addrspace(3)* %tmp to float addrspace(4)*
+  %tmp7 = fcmp olt float %tmp5, 8.388608e+06
+  br i1 %tmp7, label %bb8, label %bb14
+
+bb8:                                              ; preds = %bb
+  %tmp9 = tail call float @llvm.fma.f32(float %tmp5, float 0x3FE45F3060000000, float 5.000000e-01) #1
+  %tmp10 = fmul float %tmp9, 0x3E74442D00000000
+  %tmp11 = fsub float -0.000000e+00, %tmp10
+  %tmp12 = tail call float @llvm.fma.f32(float %tmp9, float 0x3E74442D00000000, float %tmp11) #1
+  store float %tmp12, float addrspace(4)* %tmp6, align 4
+  %tmp13 = fsub float -0.000000e+00, %tmp12
+  br label %bb15
+
+bb14:                                             ; preds = %bb
+  store float 2.000000e+00, float addrspace(4)* %tmp6, align 4
+  br label %bb15
+
+bb15:                                             ; preds = %bb14, %bb8
+  %tmp16 = phi float [ 0.000000e+00, %bb14 ], [ %tmp13, %bb8 ]
+  %tmp17 = fsub float -0.000000e+00, %tmp16
+  %tmp18 = tail call float @llvm.fma.f32(float 1.000000e+00, float 0x3FF0AAAAA0000000, float %tmp17) #1
+  %tmp19 = fsub float 2.187500e-01, %tmp18
+  %tmp20 = fsub float 7.187500e-01, %tmp19
+  %tmp21 = fcmp ogt float %tmp5, 1.600000e+01
+  %tmp22 = select i1 %tmp21, float 0x7FF8000000000000, float %tmp20
+  %tmp23 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp3
+  store float %tmp22, float addrspace(1)* %tmp23, align 4
+  ret void
+}
+
+declare float @llvm.fma.f32(float, float, float) #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }

Added: llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,121 @@
+; RUN: opt -S -codegenprepare -mtriple=amdgcn--amdhsa < %s | FileCheck %s
+
+; CHECK-LABEL: @no_sink_local_to_flat(
+; CHECK: addrspacecast
+; CHECK: br
+; CHECK-NOT: addrspacecast
+define i64 @no_sink_local_to_flat(i1 %pred, i64 addrspace(3)* %ptr) {
+  %ptr_cast = addrspacecast i64 addrspace(3)* %ptr to i64*
+  br i1 %pred, label %l1, label %l2
+
+l1:
+  %v1 = load i64, i64 addrspace(3)* %ptr
+  ret i64 %v1
+
+l2:
+  %v2 = load i64, i64* %ptr_cast
+  ret i64 %v2
+}
+
+; CHECK-LABEL: @no_sink_private_to_flat(
+; CHECK: addrspacecast
+; CHECK: br
+; CHECK-NOT: addrspacecast
+define i64 @no_sink_private_to_flat(i1 %pred, i64 addrspace(5)* %ptr) {
+  %ptr_cast = addrspacecast i64 addrspace(5)* %ptr to i64*
+  br i1 %pred, label %l1, label %l2
+
+l1:
+  %v1 = load i64, i64 addrspace(5)* %ptr
+  ret i64 %v1
+
+l2:
+  %v2 = load i64, i64* %ptr_cast
+  ret i64 %v2
+}
+
+
+; CHECK-LABEL: @sink_global_to_flat(
+; CHECK-NOT: addrspacecast
+; CHECK: br
+; CHECK: addrspacecast
+define i64 @sink_global_to_flat(i1 %pred, i64 addrspace(1)* %ptr) {
+  %ptr_cast = addrspacecast i64 addrspace(1)* %ptr to i64*
+  br i1 %pred, label %l1, label %l2
+
+l1:
+  %v1 = load i64, i64 addrspace(1)* %ptr
+  ret i64 %v1
+
+l2:
+  %v2 = load i64, i64* %ptr_cast
+  ret i64 %v2
+}
+
+; CHECK-LABEL: @sink_flat_to_global(
+; CHECK-NOT: addrspacecast
+; CHECK: br
+; CHECK: addrspacecast
+define i64 @sink_flat_to_global(i1 %pred, i64* %ptr) {
+  %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(1)*
+  br i1 %pred, label %l1, label %l2
+
+l1:
+  %v1 = load i64, i64* %ptr
+  ret i64 %v1
+
+l2:
+  %v2 = load i64, i64 addrspace(1)* %ptr_cast
+  ret i64 %v2
+}
+
+; CHECK-LABEL: @sink_flat_to_constant(
+; CHECK-NOT: addrspacecast
+; CHECK: br
+; CHECK: addrspacecast
+define i64 @sink_flat_to_constant(i1 %pred, i64* %ptr) {
+  %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(4)*
+  br i1 %pred, label %l1, label %l2
+
+l1:
+  %v1 = load i64, i64* %ptr
+  ret i64 %v1
+
+l2:
+  %v2 = load i64, i64 addrspace(4)* %ptr_cast
+  ret i64 %v2
+}
+
+; CHECK-LABEL: @sink_flat_to_local(
+; CHECK-NOT: addrspacecast
+; CHECK: br
+; CHECK: addrspacecast
+define i64 @sink_flat_to_local(i1 %pred, i64* %ptr) {
+  %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(3)*
+  br i1 %pred, label %l1, label %l2
+
+l1:
+  %v1 = load i64, i64* %ptr
+  ret i64 %v1
+
+l2:
+  %v2 = load i64, i64 addrspace(3)* %ptr_cast
+  ret i64 %v2
+}
+
+; CHECK-LABEL: @sink_flat_to_private(
+; CHECK-NOT: addrspacecast
+; CHECK: br
+; CHECK: addrspacecast
+define i64 @sink_flat_to_private(i1 %pred, i64* %ptr) {
+  %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(5)*
+  br i1 %pred, label %l1, label %l2
+
+l1:
+  %v1 = load i64, i64* %ptr
+  ret i64 %v1
+
+l2:
+  %v2 = load i64, i64 addrspace(5)* %ptr_cast
+  ret i64 %v2
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt -S -loop-unroll -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+; CHECK-LABEL: @f
+define i32 @f(i32 %a) #0 {
+; CHECK: call i32 @llvm.bitreverse.i32
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32 %or
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %b.07 = phi i32 [ 0, %entry ], [ %or, %for.body ]
+  %shr = lshr i32 %a, %i.08
+  %and = and i32 %shr, 1
+  %sub = sub nuw nsw i32 31, %i.08
+  %shl = shl i32 %and, %sub
+  %or = or i32 %shl, %b.07
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 32
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !3
+}
+
+attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!"clang version 3.8.0"}
+!3 = distinct !{!3, !4}
+!4 = !{!"llvm.loop.unroll.full"}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/ARM/large-offset-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/ARM/large-offset-gep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/ARM/large-offset-gep.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/ARM/large-offset-gep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,157 @@
+; RUN: llc -mtriple=armv6m-linux-gnueabi -verify-machineinstrs -o - %s -disable-constant-hoisting | FileCheck %s
+
+%struct_type = type { [10000 x i32], i32, i32 }
+
+define void @test1(%struct_type** %s, i32 %n) {
+; CHECK-LABEL: test1
+entry:
+  %struct = load %struct_type*, %struct_type** %s
+  br label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK:      str      r{{[0-9]+}}, [r{{[0-9]+}}]
+; CHECK-NEXT: str      r{{[0-9]+}}, [r{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+; CHECK: .LCPI0_0:
+; CHECK-NEXT: .long   40000
+; CHECK-NOT: LCPI0
+}
+
+define void @test2(%struct_type* %struct, i32 %n) {
+; CHECK-LABEL: test2
+entry:
+  %cmp = icmp eq %struct_type* %struct, null
+  br i1 %cmp, label %while_end, label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp1 = icmp slt i32 %phi, %n
+  br i1 %cmp1, label %while_body, label %while_end
+
+while_body:
+; CHECK:      str      r{{[0-9]+}}, [r{{[0-9]+}}]
+; CHECK-NEXT: str      r{{[0-9]+}}, [r{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+; CHECK: .LCPI1_0:
+; CHECK-NEXT: .long   40000
+; CHECK-NOT: LCPI1
+}
+
+define void @test3(%struct_type* %s1, %struct_type* %s2, i1 %cond, i32 %n) {
+; CHECK-LABEL: test3
+entry:
+  br i1 %cond, label %if_true, label %if_end
+
+if_true:
+  br label %if_end
+
+if_end:
+  %struct = phi %struct_type* [ %s1, %entry ], [ %s2, %if_true ]
+  %cmp = icmp eq %struct_type* %struct, null
+  br i1 %cmp, label %while_end, label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %if_end ], [ %i, %while_body ]
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp1 = icmp slt i32 %phi, %n
+  br i1 %cmp1, label %while_body, label %while_end
+
+while_body:
+; CHECK:      str      r{{[0-9]+}}, [r{{[0-9]+}}]
+; CHECK-NEXT: str      r{{[0-9]+}}, [r{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+; CHECK: .LCPI2_0:
+; CHECK-NEXT: .long   40000
+; CHECK-NOT: LCPI2
+}
+
+declare %struct_type* @foo()
+
+define void @test4(i32 %n) personality i32 (...)* @__FrameHandler {
+; CHECK-LABEL: test4
+entry:
+  %struct = invoke %struct_type* @foo() to label %while_cond unwind label %cleanup
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+  %gep0 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 1
+  %gep1 = getelementptr %struct_type, %struct_type* %struct, i64 0, i32 2
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK:      str      r{{[0-9]+}}, [r{{[0-9]+}}]
+; CHECK-NEXT: str      r{{[0-9]+}}, [r{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+
+cleanup:
+  landingpad { i8*, i32 } cleanup
+  unreachable
+; CHECK: .LCPI3_0:
+; CHECK-NEXT: .long   40000
+; CHECK-NOT: LCPI3
+}
+
+declare i32 @__FrameHandler(...)
+
+define void @test5([65536 x i32]** %s, i32 %n) {
+; CHECK-LABEL: test5
+entry:
+  %struct = load [65536 x i32]*, [65536 x i32]** %s
+  br label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+  %gep0 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20000
+  %gep1 = getelementptr [65536 x i32], [65536 x i32]* %struct, i64 0, i32 20001
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+; CHECK:      str      r{{[0-9]+}}, [r{{[0-9]+}}]
+; CHECK-NEXT: str      r{{[0-9]+}}, [r{{[0-9]+}}, #4]
+  %i = add i32 %phi, 1
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  br label %while_cond
+
+while_end:
+  ret void
+; CHECK: .LCPI4_0:
+; CHECK-NEXT: .long   80000
+; CHECK-NOT: LCPI4
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/ARM/memory-intrinsics.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,43 @@
+; RUN: opt -codegenprepare -mtriple=arm7-unknown-unknown -S < %s | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) nounwind
+
+define void @test_memcpy(i8* align 4 %dst, i8* align 8 %src, i32 %N) {
+; CHECK-LABEL: @test_memcpy
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false)
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %N, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %dst, i8* align 2 %src, i32 %N, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false)
+  ret void
+}
+
+define void @test_memmove(i8* align 4 %dst, i8* align 8 %src, i32 %N) {
+; CHECK-LABEL: @test_memmove
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false)
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %dst, i8* align 8 %src, i32 %N, i1 false)
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false)
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %N, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* align 2 %dst, i8* align 2 %src, i32 %N, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %dst, i8* align 16 %src, i32 %N, i1 false)
+  ret void
+}
+
+define void @test_memset(i8* align 4 %dst, i8 %val, i32 %N) {
+; CHECK-LABEL: @test_memset
+; CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %dst, i8 %val, i32 %N, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %dst, i8 %val, i32 %N, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i32(i8* align 8 %dst, i8 %val, i32 %N, i1 false)
+entry:
+  call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val, i32 %N, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* align 2 %dst, i8 %val, i32 %N, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* align 8 %dst, i8 %val, i32 %N, i1 false)
+  ret void
+}
+
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/ARM/overflow-intrinsics.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8m.main-arm-none-eabi"
+
+; CHECK-LABEL: uadd_overflow_too_far_cmp_dom
+; CHECK-NOT: with.overflow.i32
+define i32 @uadd_overflow_too_far_cmp_dom(i32 %arg0) {
+entry:
+  %cmp = icmp ne i32 %arg0, 0
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:
+  call void @foo()
+  br label %exit
+
+if.else:
+  call void @bar()
+  br label %if.end
+
+if.end:
+  %dec = add nsw i32 %arg0, -1
+  br label %exit
+
+exit:
+  %res = phi i32 [ %arg0, %if.then ], [ %dec, %if.end ]
+  ret i32 %res
+}
+
+; CHECK-LABEL: uadd_overflow_too_far_math_dom
+; CHECK-NOT: with.overflow.i32
+define i32 @uadd_overflow_too_far_math_dom(i32 %arg0, i32 %arg1) {
+entry:
+  %dec = add nsw i32 %arg0, -1
+  %cmp = icmp ugt i32 %arg0, 1
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:
+  call void @foo()
+  br label %if.end
+
+if.else:
+  call void @bar()
+  br label %if.end
+
+if.end:
+  %cmp.i.i = icmp ne i32 %arg0, 0
+  %tobool = zext i1 %cmp.i.i to i32
+  br label %exit
+
+exit:
+  ret i32 %tobool
+}
+
+declare void @foo()
+declare void @bar()

Added: llvm/trunk/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,420 @@
+; RUN: opt -S -codegenprepare -mtriple=thumbv7m -disable-complex-addr-modes=false -addr-sink-new-select=true -addr-sink-new-phis=true < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+ at gv1 = common global i32 0, align 4
+ at gv2 = common global i32 0, align 4
+
+; Phi selects between ptr and gep with ptr as base and constant offset
+define void @test_phi_onegep_offset(i32* %ptr, i32 %value) {
+; CHECK-LABEL: @test_phi_onegep_offset
+; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if.then ]
+; CHECK: phi i32 [ 4, %if.then ], [ 0, %entry ]
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  %gep = getelementptr inbounds i32, i32* %ptr, i32 1
+  br label %if.end
+
+if.end:
+  %phi = phi i32* [ %ptr, %entry ], [ %gep, %if.then ]
+  store i32 %value, i32* %phi, align 4
+  ret void
+}
+
+; Phi selects between two geps with same base, different constant offsets
+define void @test_phi_twogep_offset(i32* %ptr, i32 %value) {
+; CHECK-LABEL: @test_phi_twogep_offset
+; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+; CHECK: phi i32 [ 8, %if.else ], [ 4, %if.then ]
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  br label %if.end
+
+if.else:
+  %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  br label %if.end
+
+if.end:
+  %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+  store i32 %value, i32* %phi, align 4
+  ret void
+}
+
+; Phi selects between ptr and gep with ptr as base and nonconstant offset
+define void @test_phi_onegep_nonconst_offset(i32* %ptr, i32 %value, i32 %off) {
+; CHECK-LABEL: @test_phi_onegep_nonconst_offset
+; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if.then ]
+; CHECK: phi i32 [ %off, %if.then ], [ 0, %entry ]
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  %gep = getelementptr inbounds i32, i32* %ptr, i32 %off
+  br label %if.end
+
+if.end:
+  %phi = phi i32* [ %ptr, %entry ], [ %gep, %if.then ]
+  store i32 %value, i32* %phi, align 4
+  ret void
+}
+
+; Phi selects between two geps with same base, different nonconstant offsets
+define void @test_phi_twogep_nonconst_offset(i32* %ptr, i32 %value, i32 %off1, i32 %off2) {
+; CHECK-LABEL: @test_phi_twogep_nonconst_offset
+; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+; CHECK: phi i32 [ %off2, %if.else ], [ %off1, %if.then ]
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off1
+  br label %if.end
+
+if.else:
+  %gep2 = getelementptr inbounds i32, i32* %ptr, i32 %off2
+  br label %if.end
+
+if.end:
+  %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+  store i32 %value, i32* %phi, align 4
+  ret void
+}
+
+; Phi selects between two geps with different base, same constant offset
+define void @test_phi_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) {
+; CHECK-LABEL: @test_phi_twogep_base
+; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+; CHECK: phi i32* [ %ptr2, %if.else ], [ %ptr1, %if.then ]
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %gep1 = getelementptr inbounds i32, i32* %ptr1, i32 1
+  br label %if.end
+
+if.else:
+  %gep2 = getelementptr inbounds i32, i32* %ptr2, i32 1
+  br label %if.end
+
+if.end:
+  %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+  store i32 %value, i32* %phi, align 4
+  ret void
+}
+
+; Phi selects between two geps with different base global variables, same constant offset
+define void @test_phi_twogep_base_gv(i32 %value) {
+; CHECK-LABEL: @test_phi_twogep_base_gv
+; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+; CHECK: phi i32* [ @gv2, %if.else ], [ @gv1, %if.then ]
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %gep1 = getelementptr inbounds i32, i32* @gv1, i32 1
+  br label %if.end
+
+if.else:
+  %gep2 = getelementptr inbounds i32, i32* @gv2, i32 1
+  br label %if.end
+
+if.end:
+  %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ]
+  store i32 %value, i32* %phi, align 4
+  ret void
+}
+
+; Phi selects between ptr and gep with ptr as base and constant offset
+define void @test_select_onegep_offset(i32* %ptr, i32 %value) {
+; CHECK-LABEL: @test_select_onegep_offset
+; CHECK-NOT: select i1 %cmp, i32* %ptr, i32* %gep
+; CHECK: select i1 %cmp, i32 0, i32 4
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  %gep = getelementptr inbounds i32, i32* %ptr, i32 1
+  %select = select i1 %cmp, i32* %ptr, i32* %gep
+  store i32 %value, i32* %select, align 4
+  ret void
+}
+
+; Select between two geps with same base, different constant offsets
+define void @test_select_twogep_offset(i32* %ptr, i32 %value) {
+; CHECK-LABEL: @test_select_twogep_offset
+; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2
+; CHECK: select i1 %cmp, i32 4, i32 8
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  store i32 %value, i32* %select, align 4
+  ret void
+}
+
+; Select between ptr and gep with ptr as base and nonconstant offset
+define void @test_select_onegep_nonconst_offset(i32* %ptr, i32 %value, i32 %off) {
+; CHECK-LABEL: @test_select_onegep_nonconst_offset
+; CHECK-NOT: select i1 %cmp, i32* %ptr, i32* %gep
+; CHECK: select i1 %cmp, i32 0, i32 %off
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  %gep = getelementptr inbounds i32, i32* %ptr, i32 %off
+  %select = select i1 %cmp, i32* %ptr, i32* %gep
+  store i32 %value, i32* %select, align 4
+  ret void
+}
+
+; Select between two geps with same base, different nonconstant offsets
+define void @test_select_twogep_nonconst_offset(i32* %ptr, i32 %value, i32 %off1, i32 %off2) {
+; CHECK-LABEL: @test_select_twogep_nonconst_offset
+; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2
+; CHECK: select i1 %cmp, i32 %off1, i32 %off2
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off1
+  %gep2 = getelementptr inbounds i32, i32* %ptr, i32 %off2
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  store i32 %value, i32* %select, align 4
+  ret void
+}
+
+; Select between two geps with different base, same constant offset
+define void @test_select_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) {
+; CHECK-LABEL: @test_select_twogep_base
+; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2
+; CHECK: select i1 %cmp, i32* %ptr1, i32* %ptr2
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  %gep1 = getelementptr inbounds i32, i32* %ptr1, i32 1
+  %gep2 = getelementptr inbounds i32, i32* %ptr2, i32 1
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  store i32 %value, i32* %select, align 4
+  ret void
+}
+
+; Select between two geps with different base global variables, same constant offset
+define void @test_select_twogep_base_gv(i32 %value) {
+; CHECK-LABEL: @test_select_twogep_base_gv
+; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2
+; CHECK: select i1 %cmp, i32* @gv1, i32* @gv2
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  %gep1 = getelementptr inbounds i32, i32* @gv1, i32 1
+  %gep2 = getelementptr inbounds i32, i32* @gv2, i32 1
+  %select = select i1 %cmp, i32* %gep1, i32* %gep2
+  store i32 %value, i32* %select, align 4
+  ret void
+}
+
+; If the phi is in a different block to where the gep will be, the phi goes where
+; the original phi was not where the gep is.
+; CHECK-LABEL: @test_phi_different_block
+; CHECK-LABEL: if1.end
+; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if1.then ]
+; CHECK: phi i32 [ 4, %if1.then ], [ 0, %entry ]
+define void @test_phi_different_block(i32* %ptr, i32 %value1, i32 %value2) {
+entry:
+  %cmp1 = icmp sgt i32 %value1, 0
+  br i1 %cmp1, label %if1.then, label %if1.end
+
+if1.then:
+  %gep = getelementptr inbounds i32, i32* %ptr, i32 1
+  br label %if1.end
+
+if1.end:
+  %phi = phi i32* [ %ptr, %entry ], [ %gep, %if1.then ]
+  %cmp2 = icmp sgt i32 %value2, 0
+  br i1 %cmp2, label %if2.then, label %if2.end
+
+if2.then:
+  store i32 %value1, i32* %ptr, align 4
+  br label %if2.end
+
+if2.end:
+  store i32 %value2, i32* %phi, align 4
+  ret void
+}
+
+; A phi with three incoming values should be optimised
+; CHECK-LABEL: @test_phi_threegep
+; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else.then ], [ %gep3, %if.else.else ]
+; CHECK: phi i32 [ 12, %if.else.else ], [ 8, %if.else.then ], [ 4, %if.then ]
+define void @test_phi_threegep(i32* %ptr, i32 %value1, i32 %value2) {
+entry:
+  %cmp1 = icmp sgt i32 %value1, 0
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+  %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  br label %if.end
+
+if.else:
+  %cmp2 = icmp sgt i32 %value2, 0
+  br i1 %cmp2, label %if.else.then, label %if.else.else
+
+if.else.then:
+  %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  br label %if.end
+
+if.else.else:
+  %gep3 = getelementptr inbounds i32, i32* %ptr, i32 3
+  br label %if.end
+
+if.end:
+  %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else.then ], [ %gep3, %if.else.else ]
+  store i32 %value1, i32* %phi, align 4
+  ret void
+}
+
+; A phi with two incoming values but three geps due to nesting should be
+; optimised
+; CHECK-LABEL: @test_phi_threegep_nested
+; CHECK: %[[PHI:[a-z0-9_]+]] = phi i32 [ 12, %if.else.else ], [ 8, %if.else.then ]
+; CHECK: phi i32 [ %[[PHI]], %if.else.end ], [ 4, %if.then ]
+define void @test_phi_threegep_nested(i32* %ptr, i32 %value1, i32 %value2) {
+entry:
+  %cmp1 = icmp sgt i32 %value1, 0
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+  %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  br label %if.end
+
+if.else:
+  %cmp2 = icmp sgt i32 %value2, 0
+  br i1 %cmp2, label %if.else.then, label %if.else.else
+
+if.else.then:
+  %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  br label %if.else.end
+
+if.else.else:
+  %gep3 = getelementptr inbounds i32, i32* %ptr, i32 3
+  br label %if.else.end
+
+if.else.end:
+  %gep4 = phi i32* [ %gep2, %if.else.then ], [ %gep3, %if.else.else ]
+  store i32 %value2, i32* %ptr, align 4
+  br label %if.end
+
+if.end:
+  %phi = phi i32* [ %gep1, %if.then ], [ %gep4, %if.else.end ]
+  store i32 %value1, i32* %phi, align 4
+  ret void
+}
+
+; A nested select is expected to be optimised
+; CHECK-LABEL: @test_nested_select
+; CHECK: %[[SELECT:[a-z0-9_]+]] = select i1 %cmp2, i32 4, i32 8
+; CHECK: select i1 %cmp1, i32 4, i32 %[[SELECT]]
+define void @test_nested_select(i32* %ptr, i32 %value1, i32 %value2) {
+entry:
+  %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  %cmp1 = icmp sgt i32 %value1, 0
+  %cmp2 = icmp sgt i32 %value2, 0
+  %select1 = select i1 %cmp2, i32* %gep1, i32* %gep2
+  %select2 = select i1 %cmp1, i32* %gep1, i32* %select1
+  store i32 %value1, i32* %select2, align 4
+  ret void
+}
+
+; Scaling the offset by a different amount is expected not to be optimised
+; CHECK-LABEL: @test_select_different_scale
+; CHECK: select i1 %cmp, i32* %gep1, i32* %castgep
+define void @test_select_different_scale(i32* %ptr, i32 %value, i32 %off) {
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  %castptr = bitcast i32* %ptr to i16*
+  %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off
+  %gep2 = getelementptr inbounds i16, i16* %castptr, i32 %off
+  %castgep = bitcast i16* %gep2 to i32*
+  %select = select i1 %cmp, i32* %gep1, i32* %castgep
+  store i32 %value, i32* %select, align 4
+  ret void
+}
+
+; A select between two values is already the best we can do
+; CHECK-LABEL: @test_select_trivial
+; CHECK: select i1 %cmp, i32* %ptr1, i32* %ptr2
+define void @test_select_trivial(i32* %ptr1, i32* %ptr2, i32 %value) {
+entey:
+  %cmp = icmp sgt i32 %value, 0
+  %select = select i1 %cmp, i32* %ptr1, i32* %ptr2
+  store i32 %value, i32* %select, align 4
+  ret void
+}
+
+; A select between two global variables is already the best we can do
+; CHECK-LABEL: @test_select_trivial_gv
+; CHECK: select i1 %cmp, i32* @gv1, i32* @gv2
+define void @test_select_trivial_gv(i32 %value) {
+entey:
+  %cmp = icmp sgt i32 %value, 0
+  %select = select i1 %cmp, i32* @gv1, i32* @gv2
+  store i32 %value, i32* %select, align 4
+  ret void
+}
+
+; Same for a select between a value and global variable
+; CHECK-LABEL: @test_select_trivial_ptr_gv
+; CHECK: select i1 %cmp, i32* %ptr, i32* @gv2
+define void @test_select_trivial_ptr_gv(i32* %ptr, i32 %value) {
+entry:
+  %cmp = icmp sgt i32 %value, 0
+  %select = select i1 %cmp, i32* %ptr, i32* @gv2
+  store i32 %value, i32* %select, align 4
+  ret void
+}
+
+; Same for a select between a global variable and null, though the test needs to
+; be a little more complicated to avoid dereferencing a potential null pointer
+; CHECK-LABEL: @test_select_trivial_gv_null
+; CHECK: select i1 %cmp.i, i32* @gv1, i32* null
+define void @test_select_trivial_gv_null(){
+entry:
+  %gv1_val = load i32, i32* @gv1, align 4
+  %cmp.i = icmp eq i32 %gv1_val, 0
+  %spec.select.i = select i1 %cmp.i, i32* @gv1, i32* null
+  br i1 %cmp.i, label %if.then, label %if.end
+
+if.then:
+  %val = load i32, i32* %spec.select.i, align 4
+  %inc = add nsw i32 %val, 1
+  store i32 %inc, i32* %spec.select.i, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; Same for a select between a value and null
+; CHECK-LABEL: @test_select_trivial_ptr_null
+; CHECK: select i1 %cmp.i, i32* %ptr, i32* null
+define void @test_select_trivial_ptr_null(i32* %ptr){
+entry:
+  %gv1_val = load i32, i32* %ptr, align 4
+  %cmp.i = icmp eq i32 %gv1_val, 0
+  %spec.select.i = select i1 %cmp.i, i32* %ptr, i32* null
+  br i1 %cmp.i, label %if.then, label %if.end
+
+if.then:
+  %val = load i32, i32* %spec.select.i, align 4
+  %inc = add nsw i32 %val, 1
+  store i32 %inc, i32* %spec.select.i, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,232 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=armv7-apple-darwin < %s -codegenprepare -S | FileCheck -check-prefix=NEON %s
+; RUN: opt -mtriple=armv6-unknown-linux < %s -codegenprepare -S | FileCheck -check-prefix=NONEON %s
+
+define <8 x i16> @sink_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
+; NEON-LABEL: @sink_zext(
+; NEON-NEXT:  entry:
+; NEON-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NEON:       if.then:
+; NEON-NEXT:    [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16>
+; NEON-NEXT:    [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16>
+; NEON-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
+; NEON-NEXT:    ret <8 x i16> [[RES_1]]
+; NEON:       if.else:
+; NEON-NEXT:    [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+; NEON-NEXT:    [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
+; NEON-NEXT:    [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]]
+; NEON-NEXT:    ret <8 x i16> [[RES_2]]
+;
+; NONEON-LABEL: @sink_zext(
+; NONEON-NEXT:  entry:
+; NONEON-NEXT:    [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16>
+; NONEON-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NONEON:       if.then:
+; NONEON-NEXT:    [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16>
+; NONEON-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[ZB_1]]
+; NONEON-NEXT:    ret <8 x i16> [[RES_1]]
+; NONEON:       if.else:
+; NONEON-NEXT:    [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
+; NONEON-NEXT:    [[RES_2:%.*]] = sub <8 x i16> [[ZA]], [[ZB_2]]
+; NONEON-NEXT:    ret <8 x i16> [[RES_2]]
+;
+entry:
+  %za = zext <8 x i8> %a to <8 x i16>
+  br i1 %c, label %if.then, label %if.else
+
+if.then:
+  %zb.1 = zext <8 x i8> %b to <8 x i16>
+  %res.1 = add <8 x i16> %za, %zb.1
+  ret <8 x i16> %res.1
+
+if.else:
+  %zb.2 = zext <8 x i8> %b to <8 x i16>
+  %res.2 = sub <8 x i16> %za, %zb.2
+  ret <8 x i16> %res.2
+}
+
+define <8 x i16> @sink_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
+; NEON-LABEL: @sink_sext(
+; NEON-NEXT:  entry:
+; NEON-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NEON:       if.then:
+; NEON-NEXT:    [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
+; NEON-NEXT:    [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; NEON-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
+; NEON-NEXT:    ret <8 x i16> [[RES_1]]
+; NEON:       if.else:
+; NEON-NEXT:    [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+; NEON-NEXT:    [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
+; NEON-NEXT:    [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]]
+; NEON-NEXT:    ret <8 x i16> [[RES_2]]
+;
+; NONEON-LABEL: @sink_sext(
+; NONEON-NEXT:  entry:
+; NONEON-NEXT:    [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; NONEON-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NONEON:       if.then:
+; NONEON-NEXT:    [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
+; NONEON-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[ZB_1]]
+; NONEON-NEXT:    ret <8 x i16> [[RES_1]]
+; NONEON:       if.else:
+; NONEON-NEXT:    [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+; NONEON-NEXT:    [[RES_2:%.*]] = sub <8 x i16> [[ZA]], [[ZB_2]]
+; NONEON-NEXT:    ret <8 x i16> [[RES_2]]
+;
+entry:
+  %za = sext <8 x i8> %a to <8 x i16>
+  br i1 %c, label %if.then, label %if.else
+
+if.then:
+  %zb.1 = sext <8 x i8> %b to <8 x i16>
+  %res.1 = add <8 x i16> %za, %zb.1
+  ret <8 x i16> %res.1
+
+if.else:
+  %zb.2 = sext <8 x i8> %b to <8 x i16>
+  %res.2 = sub <8 x i16> %za, %zb.2
+  ret <8 x i16> %res.2
+}
+
+define <8 x i16> @do_not_sink_nonfree_zext(<8 x i8> %a, <8 x i16> %b, i1 %c) {
+;
+; NEON-LABEL: @do_not_sink_nonfree_zext(
+; NEON-NEXT:  entry:
+; NEON-NEXT:    [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16>
+; NEON-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NEON:       if.then:
+; NEON-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]]
+; NEON-NEXT:    ret <8 x i16> [[RES_1]]
+; NEON:       if.else:
+; NEON-NEXT:    ret <8 x i16> [[B]]
+;
+; NONEON-LABEL: @do_not_sink_nonfree_zext(
+; NONEON-NEXT:  entry:
+; NONEON-NEXT:    [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16>
+; NONEON-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NONEON:       if.then:
+; NONEON-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]]
+; NONEON-NEXT:    ret <8 x i16> [[RES_1]]
+; NONEON:       if.else:
+; NONEON-NEXT:    ret <8 x i16> [[B]]
+;
+entry:
+  %za = zext <8 x i8> %a to <8 x i16>
+  br i1 %c, label %if.then, label %if.else
+
+if.then:
+  %res.1 = add <8 x i16> %za, %b
+  ret <8 x i16> %res.1
+
+if.else:
+  ret <8 x i16> %b
+}
+
+define <8 x i16> @do_not_sink_nonfree_sext(<8 x i8> %a, <8 x i16> %b, i1 %c) {
+; CHECK-LABEL: @do_not_sink_nonfree_sext(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
+; CHECK-NEXT:    ret <8 x i16> [[RES_1]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[ZB_2]]
+;
+; NEON-LABEL: @do_not_sink_nonfree_sext(
+; NEON-NEXT:  entry:
+; NEON-NEXT:    [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; NEON-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NEON:       if.then:
+; NEON-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]]
+; NEON-NEXT:    ret <8 x i16> [[RES_1]]
+; NEON:       if.else:
+; NEON-NEXT:    ret <8 x i16> [[B]]
+;
+; NONEON-LABEL: @do_not_sink_nonfree_sext(
+; NONEON-NEXT:  entry:
+; NONEON-NEXT:    [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
+; NONEON-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NONEON:       if.then:
+; NONEON-NEXT:    [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]]
+; NONEON-NEXT:    ret <8 x i16> [[RES_1]]
+; NONEON:       if.else:
+; NONEON-NEXT:    ret <8 x i16> [[B]]
+;
+entry:
+  %za = sext <8 x i8> %a to <8 x i16>
+  br i1 %c, label %if.then, label %if.else
+
+if.then:
+  %res.1 = add <8 x i16> %za, %b
+  ret <8 x i16> %res.1
+
+if.else:
+  ret <8 x i16> %b
+}
+
+declare void @user1(<8 x i16>)
+
+; Exts can be sunk.
+define <8 x i16> @sink_shufflevector_ext_subadd_multiuse(<16 x i8> %a, <16 x i8> %b) {
+; NEON-LABEL: @sink_shufflevector_ext_subadd_multiuse(
+; NEON-NEXT:  entry:
+; NEON-NEXT:    [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:    [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:    [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16>
+; NEON-NEXT:    call void @user1(<8 x i16> [[Z3]])
+; NEON-NEXT:    br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NEON:       if.then:
+; NEON-NEXT:    [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NEON-NEXT:    [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16>
+; NEON-NEXT:    [[TMP0:%.*]] = zext <8 x i8> [[S1]] to <8 x i16>
+; NEON-NEXT:    [[RES1:%.*]] = add <8 x i16> [[TMP0]], [[Z2]]
+; NEON-NEXT:    ret <8 x i16> [[RES1]]
+; NEON:       if.else:
+; NEON-NEXT:    [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NEON-NEXT:    [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16>
+; NEON-NEXT:    [[TMP1:%.*]] = sext <8 x i8> [[S3]] to <8 x i16>
+; NEON-NEXT:    [[RES2:%.*]] = sub <8 x i16> [[TMP1]], [[Z4]]
+; NEON-NEXT:    ret <8 x i16> [[RES2]]
+;
+; NONEON-LABEL: @sink_shufflevector_ext_subadd_multiuse(
+; NONEON-NEXT:  entry:
+; NONEON-NEXT:    [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NONEON-NEXT:    [[Z1:%.*]] = zext <8 x i8> [[S1]] to <8 x i16>
+; NONEON-NEXT:    [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NONEON-NEXT:    [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16>
+; NONEON-NEXT:    call void @user1(<8 x i16> [[Z3]])
+; NONEON-NEXT:    br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NONEON:       if.then:
+; NONEON-NEXT:    [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; NONEON-NEXT:    [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16>
+; NONEON-NEXT:    [[RES1:%.*]] = add <8 x i16> [[Z1]], [[Z2]]
+; NONEON-NEXT:    ret <8 x i16> [[RES1]]
+; NONEON:       if.else:
+; NONEON-NEXT:    [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; NONEON-NEXT:    [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16>
+; NONEON-NEXT:    [[RES2:%.*]] = sub <8 x i16> [[Z3]], [[Z4]]
+; NONEON-NEXT:    ret <8 x i16> [[RES2]]
+;
+entry:
+  %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %z1 = zext <8 x i8> %s1 to <8 x i16>
+  %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %z3 = sext <8 x i8> %s3 to <8 x i16>
+  call void @user1(<8 x i16> %z3)
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %z2 = zext <8 x i8> %s2 to <8 x i16>
+  %res1 = add <8 x i16> %z1, %z2
+  ret <8 x i16> %res1
+
+if.else:
+  %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %z4 = sext <8 x i8> %s4 to <8 x i16>
+  %res2 = sub <8 x i16> %z3, %z4
+  ret <8 x i16> %res2
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/ARM/splitgep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/ARM/splitgep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/ARM/splitgep.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/ARM/splitgep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt -S -codegenprepare %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+; Check that we have deterministic output
+define void @test([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %0 = bitcast [65536 x i32]* %t to i8*
+; CHECK-NEXT:    %splitgep1 = getelementptr i8, i8* %0, i32 80000
+; CHECK-NEXT:    %s = load [65536 x i32]*, [65536 x i32]** %sp
+; CHECK-NEXT:    %1 = bitcast [65536 x i32]* %s to i8*
+; CHECK-NEXT:    %splitgep = getelementptr i8, i8* %1, i32 80000
+entry:
+  %s = load [65536 x i32]*, [65536 x i32]** %sp
+  br label %while_cond
+
+while_cond:
+  %phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
+  %gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000
+  %gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001
+  %gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000
+  %gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001
+  %cmp = icmp slt i32 %phi, %n
+  br i1 %cmp, label %while_body, label %while_end
+
+while_body:
+  %i = add i32 %phi, 1
+  %j = add i32 %phi, 2
+  store i32 %i, i32* %gep0
+  store i32 %phi, i32* %gep1
+  store i32 %i, i32* %gep2
+  store i32 %phi, i32* %gep3
+  br label %while_cond
+
+while_end:
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,77 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target triple = "armv8m.main-none-eabi"
+
+declare i8* @f0()
+declare i8* @f1()
+
+define i8* @tail_dup() {
+; CHECK-LABEL: tail_dup
+; CHECK: tail call i8* @f0()
+; CHECK-NEXT: ret i8*
+; CHECK: tail call i8* @f1()
+; CHECK-NEXT: ret i8*
+bb0:
+  %tmp0 = tail call i8* @f0()
+  br label %return
+bb1:
+  %tmp1 = tail call i8* @f1()
+  br label %return
+return:
+  %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
+  ret i8* %retval
+}
+
+define nonnull i8* @nonnull_dup() {
+; CHECK-LABEL: nonnull_dup
+; CHECK: tail call i8* @f0()
+; CHECK-NEXT: ret i8*
+; CHECK: tail call i8* @f1()
+; CHECK-NEXT: ret i8*
+bb0:
+  %tmp0 = tail call i8* @f0()
+  br label %return
+bb1:
+  %tmp1 = tail call i8* @f1()
+  br label %return
+return:
+  %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
+  ret i8* %retval
+}
+
+define i8* @noalias_dup() {
+; CHECK-LABEL: noalias_dup
+; CHECK: tail call noalias i8* @f0()
+; CHECK-NEXT: ret i8*
+; CHECK: tail call noalias i8* @f1()
+; CHECK-NEXT: ret i8*
+bb0:
+  %tmp0 = tail call noalias i8* @f0()
+  br label %return
+bb1:
+  %tmp1 = tail call noalias i8* @f1()
+  br label %return
+return:
+  %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
+  ret i8* %retval
+}
+
+; Use inreg as a way of testing that attributes (other than nonnull and
+; noalias) disable the tailcall duplication in cgp.
+
+define inreg i8* @inreg_nodup() {
+; CHECK-LABEL: inreg_nodup
+; CHECK: tail call i8* @f0()
+; CHECK-NEXT: br label %return
+; CHECK: tail call i8* @f1()
+; CHECK-NEXT: br label %return
+bb0:
+  %tmp0 = tail call i8* @f0()
+  br label %return
+bb1:
+  %tmp1 = tail call i8* @f1()
+  br label %return
+return:
+  %retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
+  ret i8* %retval
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/Mips/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/Mips/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/Mips/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/Mips/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+if not 'Mips' in config.root.targets:
+    config.unsupported = True

Added: llvm/trunk/test/Transforms/CodeGenPrepare/Mips/pr35209.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/Mips/pr35209.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/Mips/pr35209.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/Mips/pr35209.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,64 @@
+; RUN: opt -S -mtriple=mips64-mti-linux-gnu -codegenprepare < %s | FileCheck %s
+
+; Test that if an address that was sunk from a dominating bb, used in a
+; select that is erased along with its' trivally dead operand, that the
+; sunken address is not reused if the same address computation occurs
+; after the select. Previously, this caused a ICE.
+
+%struct.az = type { i32, %struct.bt* }
+%struct.bt = type { i32 }
+%struct.f = type { %struct.ax, %union.anon }
+%struct.ax = type { %struct.az* }
+%union.anon = type { %struct.bd }
+%struct.bd = type { i64 }
+%struct.bg = type { i32, i32 }
+%struct.ap = type { i32, i32 }
+
+ at ch = common global %struct.f zeroinitializer, align 8
+ at j = common global %struct.az* null, align 8
+ at ck = common global i32 0, align 4
+ at h = common global i32 0, align 4
+ at .str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+define internal void @probestart() {
+entry:
+  %0 = load %struct.az*, %struct.az** @j, align 8
+  %bw = getelementptr inbounds %struct.az, %struct.az* %0, i64 0, i32 1
+  %1 = load i32, i32* @h, align 4
+  %cond = icmp eq i32 %1, 0
+  br i1 %cond, label %sw.bb, label %cl
+
+sw.bb:                                            ; preds = %entry
+  %call = tail call inreg { i64, i64 } @ba(i32* bitcast (%struct.f* @ch to i32*))
+  br label %cl
+
+cl:                                               ; preds = %sw.bb, %entry
+  %2 = load %struct.bt*, %struct.bt** %bw, align 8
+  %tobool = icmp eq %struct.bt* %2, null
+  %3 = load i32, i32* @ck, align 4
+  %.sink5 = select i1 %tobool, i32* getelementptr (%struct.bg, %struct.bg* bitcast (%union.anon* getelementptr inbounds (%struct.f, %struct.f* @ch, i64 0, i32 1) to %struct.bg*), i64 0, i32 1), i32* getelementptr (%struct.ap, %struct.ap* bitcast (%union.anon* getelementptr inbounds (%struct.f, %struct.f* @ch, i64 0, i32 1) to %struct.ap*), i64 0, i32 1)
+  store i32 %3, i32* %.sink5, align 4
+  store i32 1, i32* bitcast (i64* getelementptr inbounds (%struct.f, %struct.f* @ch, i64 0, i32 1, i32 0, i32 0) to i32*), align 8
+  %4 = load %struct.bt*, %struct.bt** %bw, align 8
+  tail call void (i8*, ...) @a(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0), %struct.bt* %4)
+  ret void
+}
+
+; CHECK-LABEL: @probestart()
+; CHECK-LABEL: entry:
+; CHECK: %[[I0:[0-9]+]] = load %struct.az*, %struct.az** @j
+; CHECK-LABEL: cl:
+
+; CHECK-NOT: %{{[0-9]+}}  = load %struct.bt*, %struct.bt** %bw
+; CHECK-NOT: %{{[.a-z0-9]}} = select
+; CHECK-NOT: %{{[0-9]+}}  = load %struct.bt*, %struct.bt** %bw
+
+; CHECK: %[[I1:[0-9]+]] = bitcast %struct.az* %[[I0]] to i8*
+; CHECK-NEXT: %sunkaddr = getelementptr inbounds i8, i8* %[[I1]], i64 8
+; CHECK-NEXT: %[[I2:[0-9]+]] = bitcast i8* %sunkaddr to %struct.bt**
+; CHECK-NEXT: %{{[0-9]+}} = load %struct.bt*, %struct.bt** %[[I2]]
+; CHECK-NEXT: tail call void (i8*, ...) @a
+
+declare inreg { i64, i64 } @ba(i32*)
+
+declare void @a(i8*, ...)

Added: llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,35 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; When we bypass slow div with a constant numerator which fits into the bypass
+; width, we still emit the bypass code, but we don't 'or' the numerator with
+; the denominator.
+; CHECK-LABEL: @small_constant_numer
+define i64 @small_constant_numer(i64 %a) {
+  ; CHECK: [[AND:%[0-9]+]] = and i64 %a, -4294967296
+  ; CHECK: icmp eq i64 [[AND]], 0
+
+  ; CHECK: [[TRUNC:%[0-9]+]] = trunc i64 %a to i32
+  ; CHECK: udiv i32 -1, [[TRUNC]]
+  %d = sdiv i64 4294967295, %a  ; 0xffff'ffff
+  ret i64 %d
+}
+
+; When we try to bypass slow div with a constant numerator which *doesn't* fit
+; into the bypass width, leave it as a plain 64-bit div with no bypass.
+; CHECK-LABEL: @large_constant_numer
+define i64 @large_constant_numer(i64 %a) {
+  ; CHECK-NOT: udiv i32
+  %d = sdiv i64 4294967296, %a  ; 0x1'0000'0000
+  ret i64 %d
+}
+
+; For good measure, try a value larger than 2^32.
+; CHECK-LABEL: @larger_constant_numer
+define i64 @larger_constant_numer(i64 %a) {
+  ; CHECK-NOT: udiv i32
+  %d = sdiv i64 5000000000, %a
+  ret i64 %d
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; Check that the smaller-width division that the BypassSlowDivision pass
+; creates is not marked as "exact" (that is, it doesn't claim that the
+; numerator is a multiple of the denominator).
+;
+; CHECK-LABEL: @test
+define void @test(i64 %a, i64 %b, i64* %retptr) {
+  ; CHECK: udiv i32
+  %d = sdiv i64 %a, %b
+  store i64 %d, i64* %retptr
+  ret void
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; No bypassing should be done in apparently unsuitable cases.
+define void @Test_no_bypassing(i32 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: @Test_no_bypassing(
+; CHECK-NEXT:    [[A_1:%.*]] = zext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[A_2:%.*]] = sub i64 -1, [[A_1]]
+; CHECK-NEXT:    [[RES:%.*]] = srem i64 [[A_2]], [[B:%.*]]
+; CHECK-NEXT:    store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %a.1 = zext i32 %a to i64
+  ; %a.2 is always negative so the division cannot be bypassed.
+  %a.2 = sub i64 -1, %a.1
+  %res = srem i64 %a.2, %b
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+; No OR instruction is needed if one of the operands (divisor) is known
+; to fit into 32 bits.
+define void @Test_check_one_operand(i64 %a, i32 %b, i64* %retptr) {
+; CHECK-LABEL: @Test_check_one_operand(
+; CHECK-NEXT:    [[B_1:%.*]] = zext i32 [[B:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[A:%.*]], -4294967296
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP8:%.*]]
+; CHECK:         [[TMP4:%.*]] = trunc i64 [[B_1]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = udiv i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    br label [[TMP10:%.*]]
+; CHECK:         [[TMP9:%.*]] = sdiv i64 [[A]], [[B_1]]
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:         [[TMP11:%.*]] = phi i64 [ [[TMP7]], [[TMP3]] ], [ [[TMP9]], [[TMP8]] ]
+; CHECK-NEXT:    store i64 [[TMP11]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %b.1 = zext i32 %b to i64
+  %res = sdiv i64 %a, %b.1
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+; If both operands are known to fit into 32 bits, then replace the division
+; in-place without CFG modification.
+define void @Test_check_none(i64 %a, i32 %b, i64* %retptr) {
+; CHECK-LABEL: @Test_check_none(
+; CHECK-NEXT:    [[A_1:%.*]] = and i64 [[A:%.*]], 4294967295
+; CHECK-NEXT:    [[B_1:%.*]] = zext i32 [[B:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A_1]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[B_1]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = udiv i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT:    store i64 [[TMP4]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %a.1 = and i64 %a, 4294967295
+  %b.1 = zext i32 %b to i64
+  %res = udiv i64 %a.1, %b.1
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+; In case of unsigned long division with a short dividend,
+; the long division is not needed any more.
+define void @Test_special_case(i32 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: @Test_special_case(
+; CHECK-NEXT:    [[A_1:%.*]] = zext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i64 [[A_1]], [[B:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP9:%.*]]
+; CHECK:         [[TMP3:%.*]] = trunc i64 [[B]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[A_1]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = udiv i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = urem i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:         [[TMP10:%.*]] = phi i64 [ [[TMP7]], [[TMP2]] ], [ 0, [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i64 [ [[TMP8]], [[TMP2]] ], [ [[A_1]], [[TMP0]] ]
+; CHECK-NEXT:    [[RES:%.*]] = add i64 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %a.1 = zext i32 %a to i64
+  %div = udiv i64 %a.1, %b
+  %rem = urem i64 %a.1, %b
+  %res = add i64 %div, %rem
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+
+; Do not bypass a division if one of the operands looks like a hash value.
+define void @Test_dont_bypass_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_dont_bypass_xor(
+; CHECK-NEXT:    [[C:%.*]] = xor i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = udiv i64 [[C]], [[L:%.*]]
+; CHECK-NEXT:    store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %c = xor i64 %a, %b
+  %res = udiv i64 %c, %l
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+define void @Test_dont_bypass_phi_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_dont_bypass_phi_xor(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[B:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[MERGE:%.*]], label [[XORPATH:%.*]]
+; CHECK:       xorpath:
+; CHECK-NEXT:    [[C:%.*]] = xor i64 [[A:%.*]], [[B]]
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[E:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[C]], [[XORPATH]] ]
+; CHECK-NEXT:    [[RES:%.*]] = sdiv i64 [[E]], [[L:%.*]]
+; CHECK-NEXT:    store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp eq i64 %b, 0
+  br i1 %cmp, label %merge, label %xorpath
+
+xorpath:
+  %c = xor i64 %a, %b
+  br label %merge
+
+merge:
+  %e = phi i64 [ undef, %entry ], [ %c, %xorpath ]
+  %res = sdiv i64 %e, %l
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+define void @Test_dont_bypass_mul_long_const(i64 %a, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_dont_bypass_mul_long_const(
+; CHECK-NEXT:    [[C:%.*]] = mul i64 [[A:%.*]], 5229553307
+; CHECK-NEXT:    [[RES:%.*]] = urem i64 [[C]], [[L:%.*]]
+; CHECK-NEXT:    store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %c = mul i64 %a, 5229553307 ; the constant doesn't fit 32 bits
+  %res = urem i64 %c, %l
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+define void @Test_bypass_phi_mul_const(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: @Test_bypass_phi_mul_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_MUL:%.*]] = mul nsw i64 [[A:%.*]], 34806414968801
+; CHECK-NEXT:    [[P:%.*]] = icmp sgt i64 [[A]], [[B:%.*]]
+; CHECK-NEXT:    br i1 [[P]], label [[BRANCH:%.*]], label [[MERGE:%.*]]
+; CHECK:       branch:
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[LHS:%.*]] = phi i64 [ 42, [[BRANCH]] ], [ [[A_MUL]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[LHS]], [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP8:%.*]]
+; CHECK:         [[TMP4:%.*]] = trunc i64 [[B]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[LHS]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = udiv i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    br label [[TMP10:%.*]]
+; CHECK:         [[TMP9:%.*]] = sdiv i64 [[LHS]], [[B]]
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:         [[TMP11:%.*]] = phi i64 [ [[TMP7]], [[TMP3]] ], [ [[TMP9]], [[TMP8]] ]
+; CHECK-NEXT:    store i64 [[TMP11]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a.mul = mul nsw i64 %a, 34806414968801
+  %p = icmp sgt i64 %a, %b
+  br i1 %p, label %branch, label %merge
+
+branch:
+  br label %merge
+
+merge:
+  %lhs = phi i64 [ 42, %branch ], [ %a.mul, %entry ]
+  %res = sdiv i64 %lhs, %b
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+define void @Test_bypass_mul_short_const(i64 %a, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_bypass_mul_short_const(
+; CHECK-NEXT:    [[C:%.*]] = mul i64 [[A:%.*]], -42
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[C]], [[L:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP9:%.*]]
+; CHECK:         [[TMP5:%.*]] = trunc i64 [[L]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[C]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = urem i32 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
+; CHECK-NEXT:    br label [[TMP11:%.*]]
+; CHECK:         [[TMP10:%.*]] = urem i64 [[C]], [[L]]
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:         [[TMP12:%.*]] = phi i64 [ [[TMP8]], [[TMP4]] ], [ [[TMP10]], [[TMP9]] ]
+; CHECK-NEXT:    store i64 [[TMP12]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %c = mul i64 %a, -42
+  %res = urem i64 %c, %l
+  store i64 %res, i64* %retptr
+  ret void
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,106 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; We only use the div instruction -- the rem should be DCE'ed.
+; CHECK-LABEL: @div_only
+define void @div_only(i64 %a, i64 %b, i64* %retptr) {
+  ; CHECK: udiv i32
+  ; CHECK-NOT: urem
+  ; CHECK: sdiv i64
+  ; CHECK-NOT: rem
+  %d = sdiv i64 %a, %b
+  store i64 %d, i64* %retptr
+  ret void
+}
+
+; We only use the rem instruction -- the div should be DCE'ed.
+; CHECK-LABEL: @rem_only
+define void @rem_only(i64 %a, i64 %b, i64* %retptr) {
+  ; CHECK-NOT: div
+  ; CHECK: urem i32
+  ; CHECK-NOT: div
+  ; CHECK: rem i64
+  ; CHECK-NOT: div
+  %d = srem i64 %a, %b
+  store i64 %d, i64* %retptr
+  ret void
+}
+
+; CHECK-LABEL: @udiv_by_constant(
+define i64 @udiv_by_constant(i32 %a) {
+; CHECK-NEXT:    [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A_ZEXT]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv i32 [[TMP1]], 50
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    ret i64 [[TMP3]]
+
+  %a.zext = zext i32 %a to i64
+  %wide.div = udiv i64 %a.zext, 50
+  ret i64 %wide.div
+}
+
+; CHECK-LABEL: @urem_by_constant(
+define i64 @urem_by_constant(i32 %a) {
+; CHECK-NEXT:    [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[A_ZEXT]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = urem i32 [[TMP1]], 50
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    ret i64 [[TMP3]]
+
+  %a.zext = zext i32 %a to i64
+  %wide.div = urem i64 %a.zext, 50
+  ret i64 %wide.div
+}
+
+; Negative test: instead of emitting a runtime check on %a, we prefer to let the
+; DAGCombiner transform this division by constant into a multiplication (with a
+; "magic constant").
+;
+; CHECK-LABEL: @udiv_by_constant_negative_0(
+define i64 @udiv_by_constant_negative_0(i64 %a) {
+; CHECK-NEXT:    [[WIDE_DIV:%.*]] = udiv i64 [[A:%.*]], 50
+; CHECK-NEXT:    ret i64 [[WIDE_DIV]]
+
+  %wide.div = udiv i64 %a, 50
+  ret i64 %wide.div
+}
+
+; Negative test: while we know the dividend is short, the divisor isn't.  This
+; test is here for completeness, but instcombine will optimize this to return 0.
+;
+; CHECK-LABEL: @udiv_by_constant_negative_1(
+define i64 @udiv_by_constant_negative_1(i32 %a) {
+; CHECK-NEXT:    [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[WIDE_DIV:%.*]] = udiv i64 [[A_ZEXT]], 8589934592
+; CHECK-NEXT:    ret i64 [[WIDE_DIV]]
+
+  %a.zext = zext i32 %a to i64
+  %wide.div = udiv i64 %a.zext, 8589934592 ;; == 1 << 33
+  ret i64 %wide.div
+}
+
+; URem version of udiv_by_constant_negative_0
+;
+; CHECK-LABEL: @urem_by_constant_negative_0(
+define i64 @urem_by_constant_negative_0(i64 %a) {
+; CHECK-NEXT:    [[WIDE_DIV:%.*]] = urem i64 [[A:%.*]], 50
+; CHECK-NEXT:    ret i64 [[WIDE_DIV]]
+
+  %wide.div = urem i64 %a, 50
+  ret i64 %wide.div
+}
+
+; URem version of udiv_by_constant_negative_1
+;
+; CHECK-LABEL: @urem_by_constant_negative_1(
+define i64 @urem_by_constant_negative_1(i32 %a) {
+; CHECK-NEXT:    [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[WIDE_DIV:%.*]] = urem i64 [[A_ZEXT]], 8589934592
+; CHECK-NEXT:    ret i64 [[WIDE_DIV]]
+
+  %a.zext = zext i32 %a to i64
+  %wide.div = urem i64 %a.zext, 8589934592 ;; == 1 << 33
+  ret i64 %wide.div
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/dont-sink-nop-addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/dont-sink-nop-addrspacecast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/dont-sink-nop-addrspacecast.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/dont-sink-nop-addrspacecast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CHECK-LABEL: @test
+define i64 @test(i1 %pred, i64* %ptr) {
+; CHECK: addrspacecast
+  %ptr_as1 = addrspacecast i64* %ptr to i64 addrspace(1)*
+  br i1 %pred, label %l1, label %l2
+l1:
+; CHECK-LABEL: l1:
+; CHECK-NOT: addrspacecast
+  %v1 = load i64, i64* %ptr
+  ret i64 %v1
+l2:
+  ; CHECK-LABEL: l2:
+  ; CHECK-NOT: addrspacecast
+  %v2 = load i64, i64 addrspace(1)* %ptr_as1
+  ret i64 %v2
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+    config.unsupported = True

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,117 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+; The following target lines are needed for the test to exercise what it should.
+; Without these lines, CodeGenPrepare does not try to sink the bitcasts.
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @f()
+
+declare void @g(i8*)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+; CodeGenPrepare will want to sink these bitcasts, but it selects the catchpad
+; blocks as the place to which the bitcast should be sunk.  Since catchpads
+; do not allow non-phi instructions before the terminator, this isn't possible. 
+
+; CHECK-LABEL: @test(
+define void @test(i32* %addr) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %x = getelementptr i32, i32* %addr, i32 1
+  %p1 = bitcast i32* %x to i8*
+  invoke void @f()
+          to label %invoke.cont unwind label %catch1
+
+; CHECK: invoke.cont:
+; CHECK-NEXT: %y = getelementptr i32, i32* %addr, i32 2
+invoke.cont:
+  %y = getelementptr i32, i32* %addr, i32 2
+  %p2 = bitcast i32* %y to i8*
+  invoke void @f()
+          to label %done unwind label %catch2
+
+done:
+  ret void
+
+catch1:
+  %cs1 = catchswitch within none [label %handler1] unwind to caller
+
+handler1:
+  %cp1 = catchpad within %cs1 []
+  br label %catch.shared
+; CHECK: handler1:
+; CHECK-NEXT: catchpad within %cs1
+; CHECK: %[[p1:[0-9]+]] = bitcast i32* %x to i8*
+
+catch2:
+  %cs2 = catchswitch within none [label %handler2] unwind to caller
+
+handler2:
+  %cp2 = catchpad within %cs2 []
+  br label %catch.shared
+; CHECK: handler2:
+; CHECK: catchpad within %cs2
+; CHECK: %[[p2:[0-9]+]] = bitcast i32* %y to i8*
+
+; CHECK: catch.shared:
+; CHECK-NEXT: %p = phi i8* [ %[[p1]], %handler1 ], [ %[[p2]], %handler2 ]
+catch.shared:
+  %p = phi i8* [ %p1, %handler1 ], [ %p2, %handler2 ]
+  call void @g(i8* %p)
+  unreachable
+}
+
+; CodeGenPrepare will want to hoist these llvm.dbg.value calls to the phi, but
+; there is no insertion point in a catchpad block.
+
+; CHECK-LABEL: @test_dbg_value(
+define void @test_dbg_value() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %a = alloca i8
+  %b = alloca i8
+  invoke void @f() to label %next unwind label %catch.dispatch
+next:
+  invoke void @f() to label %ret unwind label %catch.dispatch
+ret:
+  ret void
+
+catch.dispatch:
+  %p = phi i8* [%a, %entry], [%b, %next]
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+  %cp1 = catchpad within %cs1 []
+  tail call void @llvm.dbg.value(metadata i8* %p, i64 0, metadata !11, metadata !13), !dbg !14
+  call void @g(i8* %p)
+  catchret from %cp1 to label %ret
+
+; CHECK: catch.dispatch:
+; CHECK-NEXT: phi i8
+; CHECK-NEXT: catchswitch
+; CHECK-NOT: llvm.dbg.value
+
+; CHECK: catch:
+; CHECK-NEXT: catchpad
+; CHECK-NEXT: call void @llvm.dbg.value
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: null)
+!1 = !DIFile(filename: "t.c", directory: "D:\5Csrc\5Cllvm\5Cbuild")
+!4 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, retainedNodes: null)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)"}
+!11 = !DILocalVariable(name: "p", scope: !4, file: !1, line: 2, type: !12)
+!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!13 = !DIExpression(DW_OP_deref)
+!14 = !DILocation(line: 2, column: 8, scope: !4)
+!15 = !DILocation(line: 3, column: 1, scope: !4)

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/computedgoto.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/computedgoto.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/computedgoto.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/computedgoto.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,294 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @use(i32) local_unnamed_addr
+declare void @useptr([2 x i8*]*) local_unnamed_addr
+
+; CHECK: @simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
+ at simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
+
+; CHECK: @multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
+ at multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
+
+; CHECK: @loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
+ at loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
+
+; CHECK: @nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
+ at nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
+
+; CHECK: @noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16
+ at noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16
+
+; Check that we break the critical edge when an jump table has only one use.
+define void @simple(i32* nocapture readonly %p) {
+; CHECK-LABEL: @simple(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
+; CHECK-NEXT:    switch i32 [[INITOP]], label [[EXIT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[BB0_CLONE:%.*]]
+; CHECK-NEXT:    i32 1, label [[BB1_CLONE:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       bb0:
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[MERGE:%.*]] = phi i32* [ [[PTR:%.*]], [[BB0:%.*]] ], [ [[INCDEC_PTR]], [[BB0_CLONE]] ]
+; CHECK-NEXT:    [[MERGE2:%.*]] = phi i32 [ 0, [[BB0]] ], [ [[INITVAL]], [[BB0_CLONE]] ]
+; CHECK-NEXT:    tail call void @use(i32 [[MERGE2]])
+; CHECK-NEXT:    br label [[INDIRECTGOTO:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[DOTSPLIT3:%.*]]
+; CHECK:       .split3:
+; CHECK-NEXT:    [[MERGE5:%.*]] = phi i32* [ [[PTR]], [[BB1:%.*]] ], [ [[INCDEC_PTR]], [[BB1_CLONE]] ]
+; CHECK-NEXT:    [[MERGE7:%.*]] = phi i32 [ 1, [[BB1]] ], [ [[INITVAL]], [[BB1_CLONE]] ]
+; CHECK-NEXT:    tail call void @use(i32 [[MERGE7]])
+; CHECK-NEXT:    br label [[INDIRECTGOTO]]
+; CHECK:       indirectgoto:
+; CHECK-NEXT:    [[P_ADDR_SINK:%.*]] = phi i32* [ [[MERGE5]], [[DOTSPLIT3]] ], [ [[MERGE]], [[DOTSPLIT]] ]
+; CHECK-NEXT:    [[PTR]] = getelementptr inbounds i32, i32* [[P_ADDR_SINK]], i64 1
+; CHECK-NEXT:    [[NEWP:%.*]] = load i32, i32* [[P_ADDR_SINK]], align 4
+; CHECK-NEXT:    [[IDX:%.*]] = sext i32 [[NEWP]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 [[IDX]]
+; CHECK-NEXT:    [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+; CHECK-NEXT:    indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+; CHECK:       bb0.clone:
+; CHECK-NEXT:    br label [[DOTSPLIT]]
+; CHECK:       bb1.clone:
+; CHECK-NEXT:    br label [[DOTSPLIT3]]
+;
+entry:
+  %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
+  %initval = load i32, i32* %p, align 4
+  %initop = load i32, i32* %incdec.ptr, align 4
+  switch i32 %initop, label %exit [
+  i32 0, label %bb0
+  i32 1, label %bb1
+  ]
+
+bb0:
+  %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
+  %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %indirectgoto ]
+  tail call void @use(i32 %opcode.0)
+  br label %indirectgoto
+
+bb1:
+  %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
+  %opcode.1 = phi i32 [ %initval, %entry ], [ 1, %indirectgoto ]
+  tail call void @use(i32 %opcode.1)
+  br label %indirectgoto
+
+indirectgoto:
+  %p.addr.sink = phi i32* [ %p.addr.1, %bb1 ], [ %p.addr.0, %bb0 ]
+  %ptr = getelementptr inbounds i32, i32* %p.addr.sink, i64 1
+  %newp = load i32, i32* %p.addr.sink, align 4
+  %idx = sext i32 %newp to i64
+  %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 %idx
+  %newop = load i8*, i8** %arrayidx, align 8
+  indirectbr i8* %newop, [label %bb0, label %bb1]
+
+exit:
+  ret void
+}
+
+; Don't try to break critical edges when several indirectbr point to a single block
+define void @multi(i32* nocapture readonly %p) {
+; CHECK-LABEL: @multi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
+; CHECK-NEXT:    switch i32 [[INITOP]], label [[EXIT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[BB0:%.*]]
+; CHECK-NEXT:    i32 1, label [[BB1:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[P_ADDR_0:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY:%.*]] ], [ [[NEXT0:%.*]], [[BB0]] ], [ [[NEXT1:%.*]], [[BB1]] ]
+; CHECK-NEXT:    [[OPCODE_0:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
+; CHECK-NEXT:    tail call void @use(i32 [[OPCODE_0]])
+; CHECK-NEXT:    [[NEXT0]] = getelementptr inbounds i32, i32* [[P_ADDR_0]], i64 1
+; CHECK-NEXT:    [[NEWP0:%.*]] = load i32, i32* [[P_ADDR_0]], align 4
+; CHECK-NEXT:    [[IDX0:%.*]] = sext i32 [[NEWP0]] to i64
+; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX0]]
+; CHECK-NEXT:    [[NEWOP0:%.*]] = load i8*, i8** [[ARRAYIDX0]], align 8
+; CHECK-NEXT:    indirectbr i8* [[NEWOP0]], [label [[BB0]], label %bb1]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[P_ADDR_1:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY]] ], [ [[NEXT0]], [[BB0]] ], [ [[NEXT1]], [[BB1]] ]
+; CHECK-NEXT:    [[OPCODE_1:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
+; CHECK-NEXT:    tail call void @use(i32 [[OPCODE_1]])
+; CHECK-NEXT:    [[NEXT1]] = getelementptr inbounds i32, i32* [[P_ADDR_1]], i64 1
+; CHECK-NEXT:    [[NEWP1:%.*]] = load i32, i32* [[P_ADDR_1]], align 4
+; CHECK-NEXT:    [[IDX1:%.*]] = sext i32 [[NEWP1]] to i64
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX1]]
+; CHECK-NEXT:    [[NEWOP1:%.*]] = load i8*, i8** [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    indirectbr i8* [[NEWOP1]], [label [[BB0]], label %bb1]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
+  %initval = load i32, i32* %p, align 4
+  %initop = load i32, i32* %incdec.ptr, align 4
+  switch i32 %initop, label %exit [
+  i32 0, label %bb0
+  i32 1, label %bb1
+  ]
+
+bb0:
+  %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
+  %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
+  tail call void @use(i32 %opcode.0)
+  %next0 = getelementptr inbounds i32, i32* %p.addr.0, i64 1
+  %newp0 = load i32, i32* %p.addr.0, align 4
+  %idx0 = sext i32 %newp0 to i64
+  %arrayidx0 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx0
+  %newop0 = load i8*, i8** %arrayidx0, align 8
+  indirectbr i8* %newop0, [label %bb0, label %bb1]
+
+bb1:
+  %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
+  %opcode.1 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
+  tail call void @use(i32 %opcode.1)
+  %next1 = getelementptr inbounds i32, i32* %p.addr.1, i64 1
+  %newp1 = load i32, i32* %p.addr.1, align 4
+  %idx1 = sext i32 %newp1 to i64
+  %arrayidx1 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx1
+  %newop1 = load i8*, i8** %arrayidx1, align 8
+  indirectbr i8* %newop1, [label %bb0, label %bb1]
+
+exit:
+  ret void
+}
+
+; Make sure we do the right thing for cases where the indirectbr branches to
+; the block it terminates.
+define void @loop(i64* nocapture readonly %p) {
+; CHECK-LABEL: @loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    br label [[DOTSPLIT]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[MERGE:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[BB0:%.*]] ], [ 0, [[BB0_CLONE:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[MERGE]]
+; CHECK-NEXT:    store i64 [[MERGE]], i64* [[TMP0]], align 4
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[MERGE]], 1
+; CHECK-NEXT:    [[IDX:%.*]] = srem i64 [[MERGE]], 2
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 [[IDX]]
+; CHECK-NEXT:    [[TARGET:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+; CHECK-NEXT:    indirectbr i8* [[TARGET]], [label [[BB0]], label %bb1]
+; CHECK:       bb1:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %bb0
+
+bb0:
+  %i = phi i64 [ %i.next, %bb0 ], [ 0, %entry ]
+  %tmp0 = getelementptr inbounds i64, i64* %p, i64 %i
+  store i64 %i, i64* %tmp0, align 4
+  %i.next = add nuw nsw i64 %i, 1
+  %idx = srem i64 %i, 2
+  %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 %idx
+  %target = load i8*, i8** %arrayidx, align 8
+  indirectbr i8* %target, [label %bb0, label %bb1]
+
+bb1:
+  ret void
+}
+
+; Don't do anything for cases that contain no phis.
+define void @nophi(i32* %p) {
+; CHECK-LABEL: @nophi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
+; CHECK-NEXT:    switch i32 [[INITOP]], label [[EXIT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[BB0:%.*]]
+; CHECK-NEXT:    i32 1, label [[BB1:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       bb0:
+; CHECK-NEXT:    tail call void @use(i32 0)
+; CHECK-NEXT:    br label [[INDIRECTGOTO:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    tail call void @use(i32 1)
+; CHECK-NEXT:    br label [[INDIRECTGOTO]]
+; CHECK:       indirectgoto:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to i8*
+; CHECK-NEXT:    [[SUNKADDR:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[SUNKADDR]] to i32*
+; CHECK-NEXT:    [[NEWP:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[IDX:%.*]] = sext i32 [[NEWP]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 [[IDX]]
+; CHECK-NEXT:    [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+; CHECK-NEXT:    indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
+  %initop = load i32, i32* %incdec.ptr, align 4
+  switch i32 %initop, label %exit [
+  i32 0, label %bb0
+  i32 1, label %bb1
+  ]
+
+bb0:
+  tail call void @use(i32 0)  br label %indirectgoto
+
+bb1:
+  tail call void @use(i32 1)
+  br label %indirectgoto
+
+indirectgoto:
+  %newp = load i32, i32* %incdec.ptr, align 4
+  %idx = sext i32 %newp to i64
+  %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 %idx
+  %newop = load i8*, i8** %arrayidx, align 8
+  indirectbr i8* %newop, [label %bb0, label %bb1]
+
+exit:
+  ret void
+}
+
+; Don't do anything if the edge isn't critical.
+define i32 @noncritical(i32 %k, i8* %p)
+; CHECK-LABEL: @noncritical(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[K:%.*]], 1
+; CHECK-NEXT:    indirectbr i8* [[P:%.*]], [label [[BB0:%.*]], label %bb1]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[R0:%.*]] = sub i32 [[K]], [[D]]
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[R1:%.*]] = sub i32 [[D]], [[K]]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[V:%.*]] = phi i32 [ [[R0]], [[BB0]] ], [ [[R1]], [[BB1:%.*]] ]
+; CHECK-NEXT:    ret i32 0
+;
+{
+entry:
+  %d = add i32 %k, 1
+  indirectbr i8* %p, [label %bb0, label %bb1]
+
+bb0:
+  %v00 = phi i32 [%k, %entry]
+  %v01 = phi i32 [%d, %entry]
+  %r0 = sub i32 %v00, %v01
+  br label %exit
+
+bb1:
+  %v10 = phi i32 [%d, %entry]
+  %v11 = phi i32 [%k, %entry]
+  %r1 = sub i32 %v10, %v11
+  br label %exit
+
+exit:
+  %v = phi i32 [%r0, %bb0], [%r1, %bb1]
+  ret i32 0
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s --check-prefix=SLOW
+; RUN: opt -S -codegenprepare -mattr=+bmi < %s | FileCheck %s --check-prefix=FAST_TZ
+; RUN: opt -S -codegenprepare -mattr=+lzcnt < %s | FileCheck %s --check-prefix=FAST_LZ
+
+target triple = "x86_64-unknown-unknown"
+target datalayout = "e-n32:64"
+
+; If the intrinsic is cheap, nothing should change.
+; If the intrinsic is expensive, check if the input is zero to avoid the call. 
+; This is undoing speculation that may have been created by SimplifyCFG + InstCombine.
+
+define i64 @cttz(i64 %A) {
+entry:
+  %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+  ret i64 %z
+
+; SLOW-LABEL: @cttz(
+; SLOW: entry:
+; SLOW:   %cmpz = icmp eq i64 %A, 0
+; SLOW:   br i1 %cmpz, label %cond.end, label %cond.false
+; SLOW: cond.false:
+; SLOW:   %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; SLOW:   br label %cond.end
+; SLOW: cond.end:
+; SLOW:   %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+; SLOW:   ret i64 %ctz
+
+; FAST_TZ-LABEL: @cttz(
+; FAST_TZ:  %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+; FAST_TZ:  ret i64 %z
+}
+
+define i64 @ctlz(i64 %A) {
+entry:
+  %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+  ret i64 %z
+
+; SLOW-LABEL: @ctlz(
+; SLOW: entry:
+; SLOW:   %cmpz = icmp eq i64 %A, 0
+; SLOW:   br i1 %cmpz, label %cond.end, label %cond.false
+; SLOW: cond.false:
+; SLOW:   %z = call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; SLOW:   br label %cond.end
+; SLOW: cond.end:
+; SLOW:   %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+; SLOW:   ret i64 %ctz
+
+; FAST_LZ-LABEL: @ctlz(
+; FAST_LZ:  %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+; FAST_LZ:  ret i64 %z
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,128 @@
+; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown    | FileCheck %s
+
+
+ at a = global [10 x i8] zeroinitializer, align 1
+declare void @foo()
+
+; ext(and(ld, cst)) -> and(ext(ld), ext(cst))
+define void @test1(i32* %p, i32 %ll) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    zext
+; CHECK-NEXT:    and
+entry:
+  %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
+  %and = and i8 %tmp, 60
+  %cmp = icmp ugt i8 %and, 20
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i8 %and to i32
+  %add = add nsw i32 %conv2, %ll
+  store i32 %add, i32* %p, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  tail call void @foo()
+  ret void
+}
+
+; ext(or(ld, cst)) -> or(ext(ld), ext(cst))
+define void @test2(i32* %p, i32 %ll) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    zext
+; CHECK-NEXT:    or
+entry:
+  %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
+  %or = or i8 %tmp, 60
+  %cmp = icmp ugt i8 %or, 20
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i8 %or to i32
+  %add = add nsw i32 %conv2, %ll
+  store i32 %add, i32* %p, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  tail call void @foo()
+  ret void
+}
+
+; ext(and(shl(ld, cst), cst)) -> and(shl(ext(ld), ext(cst)), ext(cst))
+define void @test3(i32* %p, i32 %ll) {
+; CHECK-LABEL: @test3
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    zext
+; CHECK-NEXT:    shl
+; CHECK-NEXT:    and
+entry:
+  %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
+  %shl = shl i8 %tmp, 2
+  %and = and i8 %shl, 60
+  %cmp = icmp ugt i8 %and, 20
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i8 %and to i32
+  %add = add nsw i32 %conv2, %ll
+  store i32 %add, i32* %p, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  tail call void @foo()
+  ret void
+}
+
+; zext(shrl(ld, cst)) -> shrl(zext(ld), zext(cst))
+define void @test4(i32* %p, i32 %ll) {
+; CHECK-LABEL: @test4
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    zext
+; CHECK-NEXT:    lshr
+entry:
+  %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
+  %lshr = lshr i8 %tmp, 2
+  %cmp = icmp ugt i8 %lshr, 20
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i8 %lshr to i32
+  %add = add nsw i32 %conv2, %ll
+  store i32 %add, i32* %p, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  tail call void @foo()
+  ret void
+}
+
+; ext(xor(ld, cst)) -> xor(ext(ld), ext(cst))
+define void @test5(i32* %p, i32 %ll) {
+; CHECK-LABEL: @test5
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    zext
+; CHECK-NEXT:    xor
+entry:
+  %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
+  %xor = xor i8 %tmp, 60
+  %cmp = icmp ugt i8 %xor, 20
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %conv2 = zext i8 %xor to i32
+  %add = add nsw i32 %conv2, %ll
+  store i32 %add, i32* %p, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  tail call void @foo()
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,64 @@
+; RUN: opt -codegenprepare -disable-cgp-branch-opts -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The first cast should be sunk into block2, in order that the
+; instruction selector can form an efficient
+; i64 * i64 -> i128 multiplication.
+define i128 @sink(i64* %mem1, i64* %mem2) {
+; CHECK-LABEL: block1:
+; CHECK-NEXT: load
+block1:
+  %l1 = load i64, i64* %mem1
+  %s1 = sext i64 %l1 to i128
+  br label %block2
+
+; CHECK-LABEL: block2:
+; CHECK-NEXT: sext
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block2:
+  %l2 = load i64, i64* %mem2
+  %s2 = sext i64 %l2 to i128
+  %res = mul i128 %s1, %s2
+  ret i128 %res
+}
+
+; The first cast should be hoisted into block1, in order that the
+; instruction selector can form an extend-load.
+define i64 @hoist(i32* %mem1, i32* %mem2) {
+; CHECK-LABEL: block1:
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block1:
+  %l1 = load i32, i32* %mem1
+  br label %block2
+
+; CHECK-LABEL: block2:
+; CHECK-NEXT: load
+; CHECK-NEXT: sext
+block2:
+  %s1 = sext i32 %l1 to i64
+  %l2 = load i32, i32* %mem2
+  %s2 = sext i32 %l2 to i64
+  %res = mul i64 %s1, %s2
+  ret i64 %res
+}
+
+; Make sure the cast sink logic and OptimizeExtUses don't end up in an infinite
+; loop.
+define i128 @use_ext_source() {
+block1:
+  %v1 = or i64 undef, undef
+  %v2 = zext i64 %v1 to i128
+  br i1 undef, label %block2, label %block3
+
+block2:
+  %v3 = add i64 %v1, 1
+  %v4 = zext i64 %v3 to i128
+  br label %block3
+
+block3:
+  %res = phi i128 [ %v2, %block1 ], [ %v4, %block2 ]
+  ret i128 %res
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt %s -codegenprepare -mattr=+soft-float -S | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFP
+; RUN: opt %s -codegenprepare -mattr=-soft-float -S | FileCheck %s -check-prefix=CHECK -check-prefix=HARDFP
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @foo
+; CHECK:       entry:
+; SOFTFP:      fcmp
+; HARDFP-NOT:  fcmp
+; CHECK:       body:
+; SOFTFP-NOT:  fcmp
+; HARDFP:      fcmp
+define void @foo(float %a, float %b) {
+entry:
+  %c = fcmp oeq float %a, %b
+  br label %head
+head:
+  %IND = phi i32 [ 0, %entry ], [ %IND.new, %body1 ]
+  %CMP = icmp slt i32 %IND, 1250
+  br i1 %CMP, label %body, label %tail
+body:
+  br i1 %c, label %body1, label %tail
+body1:
+  %IND.new = add i32 %IND, 1
+  br label %head
+tail:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/memset_chk-simplify-nobuiltin.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt -S -disable-simplify-libcalls -codegenprepare < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; This is a workaround for PR23093: when building with -mkernel/-fno-builtin,
+; we still generate fortified library calls.
+
+; Check that we ignore two things:
+; - attribute nobuiltin
+; - TLI::has (always returns false thanks to -disable-simplify-libcalls)
+
+; CHECK-NOT: _chk
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %len, i1 false)
+define void @test_nobuiltin(i8* %dst, i64 %len) {
+  call i8* @__memset_chk(i8* %dst, i32 0, i64 %len, i64 -1) nobuiltin
+  ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i64, i64)

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/multi-extension.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/multi-extension.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/multi-extension.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/multi-extension.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown    | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.13.0"
+
+declare void @bar(i64)
+
+ at b = global i16 0, align 2
+
+; This test case is extracted from PR38125.
+; %or is reachable by both a sext and zext that are going to be promoted.
+; It ensures correct operation on PromotedInsts.
+
+; CHECK:       %promoted = trunc i32 %or to i16
+; CHECK-NEXT:  %c = sext i16 %promoted to i64
+define i32 @foo(i16 %kkk) {
+entry:
+  %t4 = load i16, i16* @b, align 2
+  %conv4 = zext i16 %t4 to i32
+  %or = or i16 %kkk, %t4
+  %c = sext i16 %or to i64
+  call void @bar(i64 %c)
+  %t5 = and i16 %or, 5
+  %z = zext i16 %t5 to i32
+  ret i32 %z
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i1 @PR41004(i32 %x, i32 %y, i32 %t1) {
+; CHECK-LABEL: @PR41004(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[T0:%.*]] = icmp eq i32 [[Y:%.*]], 1
+; CHECK-NEXT:    br i1 [[T0]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]]
+; CHECK:       select.true.sink:
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[X:%.*]], 2
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[MUL:%.*]] = phi i32 [ [[REM]], [[SELECT_TRUE_SINK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[T1:%.*]], i32 1)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[MATH]], [[MUL]]
+; CHECK-NEXT:    ret i1 [[OV]]
+;
+entry:
+  %rem = srem i32 %x, 2
+  %t0 = icmp eq i32 %y, 1
+  %mul = select i1 %t0, i32 %rem, i32 0
+  %neg = add i32 %t1, -1
+  %add = add i32 %neg, %mul
+  br label %if
+
+if:
+  %tobool = icmp eq i32 %t1, 0
+  ret i1 %tobool
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,519 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+; RUN: opt -enable-debugify -codegenprepare -S < %s 2>&1 | FileCheck %s -check-prefix=DEBUG
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i64 @uaddo1(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: @uaddo1(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
+; CHECK-NEXT:    ret i64 [[Q]]
+;
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %a
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+define i64 @uaddo2(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: @uaddo2(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
+; CHECK-NEXT:    ret i64 [[Q]]
+;
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+define i64 @uaddo3(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: @uaddo3(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
+; CHECK-NEXT:    ret i64 [[Q]]
+;
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
+; CHECK-LABEL: @uaddo4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
+; CHECK-NEXT:    ret i64 [[Q]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+exit:
+  ret i64 0
+}
+
+define i64 @uaddo5(i64 %a, i64 %b, i64* %ptr, i1 %c) nounwind ssp {
+; CHECK-LABEL: @uaddo5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    store i64 [[ADD]], i64* [[PTR:%.*]]
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[NEXT:%.*]], label [[EXIT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[B]], [[ADD]]
+; CHECK-NEXT:    [[Q:%.*]] = select i1 [[TMP0]], i64 [[B]], i64 42
+; CHECK-NEXT:    ret i64 [[Q]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %add = add i64 %b, %a
+  store i64 %add, i64* %ptr
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+exit:
+  ret i64 0
+}
+
+; When adding 1, the general pattern for add-overflow may be different due to icmp canonicalization.
+; PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754
+
+define i1 @uaddo_i64_increment(i64 %x, i64* %p) {
+; CHECK-LABEL: @uaddo_i64_increment(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %a = add i64 %x, 1
+  %ov = icmp eq i64 %a, 0
+  store i64 %a, i64* %p
+  ret i1 %ov
+}
+
+define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, i8* %p) {
+; CHECK-LABEL: @uaddo_i8_increment_noncanonical_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 1, i8 [[X:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i8 [[MATH]], i8* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %a = add i8 1, %x        ; commute
+  %ov = icmp eq i8 %a, 0
+  store i8 %a, i8* %p
+  ret i1 %ov
+}
+
+define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, i32* %p) {
+; CHECK-LABEL: @uaddo_i32_increment_noncanonical_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 1)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[MATH]], i32* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %a = add i32 %x, 1
+  %ov = icmp eq i32 0, %a   ; commute
+  store i32 %a, i32* %p
+  ret i1 %ov
+}
+
+define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, i16* %p) {
+; CHECK-LABEL: @uaddo_i16_increment_noncanonical_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 1, i16 [[X:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i16 [[MATH]], i16* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %a = add i16 1, %x        ; commute
+  %ov = icmp eq i16 0, %a   ; commute
+  store i16 %a, i16* %p
+  ret i1 %ov
+}
+
+; The overflow check may be against the input rather than the sum.
+
+define i1 @uaddo_i64_increment_alt(i64 %x, i64* %p) {
+; CHECK-LABEL: @uaddo_i64_increment_alt(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %a = add i64 %x, 1
+  store i64 %a, i64* %p
+  %ov = icmp eq i64 %x, -1
+  ret i1 %ov
+}
+
+; Make sure insertion is done correctly based on dominance.
+
+define i1 @uaddo_i64_increment_alt_dom(i64 %x, i64* %p) {
+; CHECK-LABEL: @uaddo_i64_increment_alt_dom(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 1)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %ov = icmp eq i64 %x, -1
+  %a = add i64 %x, 1
+  store i64 %a, i64* %p
+  ret i1 %ov
+}
+
+; The overflow check may be against the input rather than the sum.
+
+define i1 @uaddo_i64_decrement_alt(i64 %x, i64* %p) {
+; CHECK-LABEL: @uaddo_i64_decrement_alt(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 -1)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %a = add i64 %x, -1
+  store i64 %a, i64* %p
+  %ov = icmp ne i64 %x, 0
+  ret i1 %ov
+}
+
+; Make sure insertion is done correctly based on dominance.
+
+define i1 @uaddo_i64_decrement_alt_dom(i64 %x, i64* %p) {
+; CHECK-LABEL: @uaddo_i64_decrement_alt_dom(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 -1)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %ov = icmp ne i64 %x, 0
+  %a = add i64 %x, -1
+  store i64 %a, i64* %p
+  ret i1 %ov
+}
+
+; No transform for illegal types.
+
+define i1 @uaddo_i42_increment_illegal_type(i42 %x, i42* %p) {
+; CHECK-LABEL: @uaddo_i42_increment_illegal_type(
+; CHECK-NEXT:    [[A:%.*]] = add i42 [[X:%.*]], 1
+; CHECK-NEXT:    [[OV:%.*]] = icmp eq i42 [[A]], 0
+; CHECK-NEXT:    store i42 [[A]], i42* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV]]
+;
+  %a = add i42 %x, 1
+  %ov = icmp eq i42 %a, 0
+  store i42 %a, i42* %p
+  ret i1 %ov
+}
+
+define i1 @usubo_ult_i64(i64 %x, i64 %y, i64* %p) {
+; CHECK-LABEL: @usubo_ult_i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %s = sub i64 %x, %y
+  store i64 %s, i64* %p
+  %ov = icmp ult i64 %x, %y
+  ret i1 %ov
+}
+
+; Verify insertion point for single-BB. Toggle predicate.
+
+define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) {
+; CHECK-LABEL: @usubo_ugt_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[MATH]], i32* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %ov = icmp ugt i32 %y, %x
+  %s = sub i32 %x, %y
+  store i32 %s, i32* %p
+  ret i1 %ov
+}
+
+; Constant operand should match.
+
+define i1 @usubo_ugt_constant_op0_i8(i8 %x, i8* %p) {
+; CHECK-LABEL: @usubo_ugt_constant_op0_i8(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 42, i8 [[X:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i8 [[MATH]], i8* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %s = sub i8 42, %x
+  %ov = icmp ugt i8 %x, 42
+  store i8 %s, i8* %p
+  ret i1 %ov
+}
+
+; Compare with constant operand 0 is canonicalized by commuting, but verify match for non-canonical form.
+
+define i1 @usubo_ult_constant_op0_i16(i16 %x, i16* %p) {
+; CHECK-LABEL: @usubo_ult_constant_op0_i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 43, i16 [[X:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i16 [[MATH]], i16* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %s = sub i16 43, %x
+  %ov = icmp ult i16 43, %x
+  store i16 %s, i16* %p
+  ret i1 %ov
+}
+
+; Subtract with constant operand 1 is canonicalized to add.
+
+define i1 @usubo_ult_constant_op1_i16(i16 %x, i16* %p) {
+; CHECK-LABEL: @usubo_ult_constant_op1_i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 [[X:%.*]], i16 44)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i16, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i16 [[MATH]], i16* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %s = add i16 %x, -44
+  %ov = icmp ult i16 %x, 44
+  store i16 %s, i16* %p
+  ret i1 %ov
+}
+
+define i1 @usubo_ugt_constant_op1_i8(i8 %x, i8* %p) {
+; CHECK-LABEL: @usubo_ugt_constant_op1_i8(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X:%.*]], i8 45)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i8, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i8, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i8 [[MATH]], i8* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %ov = icmp ugt i8 45, %x
+  %s = add i8 %x, -45
+  store i8 %s, i8* %p
+  ret i1 %ov
+}
+
+; Special-case: subtract 1 changes the compare predicate and constant.
+
+define i1 @usubo_eq_constant1_op1_i32(i32 %x, i32* %p) {
+; CHECK-LABEL: @usubo_eq_constant1_op1_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[X:%.*]], i32 1)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[MATH]], i32* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %s = add i32 %x, -1
+  %ov = icmp eq i32 %x, 0
+  store i32 %s, i32* %p
+  ret i1 %ov
+}
+
+; Special-case: subtract from 0 (negate) changes the compare predicate.
+
+define i1 @usubo_ne_constant0_op1_i32(i32 %x, i32* %p) {
+; CHECK-LABEL: @usubo_ne_constant0_op1_i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 0, i32 [[X:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT:    store i32 [[MATH]], i32* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+  %s = sub i32 0, %x
+  %ov = icmp ne i32 %x, 0
+  store i32 %s, i32* %p
+  ret i1 %ov
+}
+
+; Verify insertion point for multi-BB.
+
+declare void @call(i1)
+
+define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) {
+; CHECK-LABEL: @usubo_ult_sub_dominates_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT:    br i1 [[COND]], label [[END:%.*]], label [[F]]
+; CHECK:       f:
+; CHECK-NEXT:    ret i1 [[COND]]
+; CHECK:       end:
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+entry:
+  br i1 %cond, label %t, label %f
+
+t:
+  %s = sub i64 %x, %y
+  store i64 %s, i64* %p
+  br i1 %cond, label %end, label %f
+
+f:
+  ret i1 %cond
+
+end:
+  %ov = icmp ult i64 %x, %y
+  ret i1 %ov
+}
+
+define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, i64* %p, i1 %cond) {
+; CHECK-LABEL: @usubo_ult_cmp_dominates_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       t:
+; CHECK-NEXT:    [[OV:%.*]] = icmp ult i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @call(i1 [[OV]])
+; CHECK-NEXT:    br i1 [[OV]], label [[END:%.*]], label [[F]]
+; CHECK:       f:
+; CHECK-NEXT:    ret i1 [[COND]]
+; CHECK:       end:
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    store i64 [[MATH]], i64* [[P:%.*]]
+; CHECK-NEXT:    ret i1 [[OV1]]
+;
+entry:
+  br i1 %cond, label %t, label %f
+
+t:
+  %ov = icmp ult i64 %x, %y
+  call void @call(i1 %ov)
+  br i1 %ov, label %end, label %f
+
+f:
+  ret i1 %cond
+
+end:
+  %s = sub i64 %x, %y
+  store i64 %s, i64* %p
+  ret i1 %ov
+}
+
+; Verify that crazy/non-canonical code does not crash.
+
+define void @bar() {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 1, -1
+; CHECK-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[CMP]] to i8
+; CHECK-NEXT:    unreachable
+;
+  %cmp = icmp eq i64 1, -1
+  %frombool = zext i1 %cmp to i8
+  unreachable
+}
+
+define void @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw i64 1, 1
+; CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[SUB]] to i32
+; CHECK-NEXT:    unreachable
+;
+  %sub = add nsw i64 1, 1
+  %conv = trunc i64 %sub to i32
+  unreachable
+}
+
+; Similarly for usubo.
+
+define i1 @bar2() {
+; CHECK-LABEL: @bar2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 1, 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = icmp eq i64 1, 0
+  ret i1 %cmp
+}
+
+define i64 @foo2(i8 *%p) {
+; CHECK-LABEL: @foo2(
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw i64 1, -1
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %sub = add nsw i64 1, -1
+  ret i64 %sub
+}
+
+; Avoid hoisting a math op into a dominating block which would
+; increase the critical path.
+
+define void @PR41129(i64* %p64) {
+; CHECK-LABEL: @PR41129(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[KEY:%.*]] = load i64, i64* [[P64:%.*]], align 8
+; CHECK-NEXT:    [[COND17:%.*]] = icmp eq i64 [[KEY]], 0
+; CHECK-NEXT:    br i1 [[COND17]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK:       false:
+; CHECK-NEXT:    [[ANDVAL:%.*]] = and i64 [[KEY]], 7
+; CHECK-NEXT:    store i64 [[ANDVAL]], i64* [[P64]]
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       true:
+; CHECK-NEXT:    [[SVALUE:%.*]] = add i64 [[KEY]], -1
+; CHECK-NEXT:    store i64 [[SVALUE]], i64* [[P64]]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %key = load i64, i64* %p64, align 8
+  %cond17 = icmp eq i64 %key, 0
+  br i1 %cond17, label %true, label %false
+
+false:
+  %andval = and i64 %key, 7
+  store i64 %andval, i64* %p64
+  br label %exit
+
+true:
+  %svalue = add i64 %key, -1
+  store i64 %svalue, i64* %p64
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that every instruction inserted by -codegenprepare has a debug location.
+; DEBUG: CheckModuleDebugify: PASS
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/pr27536.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/pr27536.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/pr27536.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/pr27536.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+ at rtti = external global i8
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %e = alloca i8
+  %tmpcast = bitcast i8* %e to i16*
+  invoke void @_CxxThrowException(i8* null, i8* null)
+          to label %catchret.dest unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %0 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %1 = catchpad within %0 [i8* @rtti, i32 0, i16* %tmpcast]
+  catchret from %1 to label %catchret.dest
+
+catchret.dest:                                    ; preds = %catch
+  ret void
+}
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[alloca:.*]] = alloca i8
+; CHECK-NEXT: %[[bc:.*]] = bitcast i8* %[[alloca]] to i16*
+
+; CHECK: catchpad within {{.*}} [i8* @rtti, i32 0, i16* %[[bc]]]
+
+declare void @_CxxThrowException(i8*, i8*)
+
+declare i32 @__CxxFrameHandler3(...)

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/pr35658.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/pr35658.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/pr35658.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/pr35658.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true  %s | FileCheck %s
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+define void @f2() {
+entry:
+  %arraydecay = getelementptr inbounds [2 x i16], [2 x i16]* undef, i16 0, i16 0
+  %arrayidx1 = getelementptr inbounds [2 x i16], [2 x i16]* undef, i16 0, i16 1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %e.03 = phi i16* [ %arraydecay, %entry ], [ %arrayidx1, %for.body ]
+  %tobool = icmp eq i16 undef, 0
+  br i1 undef, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+; CHECK: sunkaddr
+  %e.1.le = select i1 %tobool, i16* %arrayidx1, i16* %e.03
+  store i16 0, i16* %e.1.le, align 1
+  ret void
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/select.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/select.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/select.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,205 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+; RUN: opt -debugify -codegenprepare -S < %s | FileCheck %s -check-prefix=DEBUG
+
+target triple = "x86_64-unknown-unknown"
+
+; Nothing to sink and convert here.
+
+define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y)  {
+; CHECK-LABEL: @no_sink(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOAD:%.*]] = load double, double* [[B:%.*]], align 8
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt double [[LOAD]], [[A:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[X:%.*]], i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+entry:
+  %load = load double, double* %b, align 8
+  %cmp = fcmp olt double %load, %a
+  %sel = select i1 %cmp, i32 %x, i32 %y
+  ret i32 %sel
+}
+
+
+; An 'fdiv' is expensive, so sink it rather than speculatively execute it.
+
+define float @fdiv_true_sink(float %a, float %b) {
+; CHECK-LABEL: @fdiv_true_sink(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 1.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]]
+; CHECK:       select.true.sink:
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]]
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[SEL:%.*]] = phi float [ [[DIV]], [[SELECT_TRUE_SINK]] ], [ 2.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret float [[SEL]]
+;
+; DEBUG-LABEL: @fdiv_true_sink(
+; DEBUG-NEXT:  entry:
+; DEBUG-NEXT:    [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 1.000000e+00
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata i1 [[CMP]]
+; DEBUG-NEXT:    br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !dbg
+; DEBUG:       select.true.sink:
+; DEBUG-NEXT:    [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]]
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata float [[DIV]]
+; DEBUG-NEXT:    br label [[SELECT_END]], !dbg
+; DEBUG:       select.end:
+; DEBUG-NEXT:    [[SEL:%.*]] = phi float [ [[DIV]], [[SELECT_TRUE_SINK]] ], [ 2.000000e+00, [[ENTRY:%.*]] ], !dbg
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata float [[SEL]]
+; DEBUG-NEXT:    ret float [[SEL]]
+;
+entry:
+  %div = fdiv float %a, %b
+  %cmp = fcmp ogt float %a, 1.0
+  %sel = select i1 %cmp, float %div, float 2.0
+  ret float %sel
+}
+
+define float @fdiv_false_sink(float %a, float %b) {
+; CHECK-LABEL: @fdiv_false_sink(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 3.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[SELECT_END:%.*]], label [[SELECT_FALSE_SINK:%.*]]
+; CHECK:       select.false.sink:
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]]
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[SEL:%.*]] = phi float [ 4.000000e+00, [[ENTRY:%.*]] ], [ [[DIV]], [[SELECT_FALSE_SINK]] ]
+; CHECK-NEXT:    ret float [[SEL]]
+;
+; DEBUG-LABEL: @fdiv_false_sink(
+; DEBUG-NEXT:  entry:
+; DEBUG-NEXT:    [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 3.000000e+00
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata i1 [[CMP]]
+; DEBUG-NEXT:    br i1 [[CMP]], label [[SELECT_END:%.*]], label [[SELECT_FALSE_SINK:%.*]], !dbg
+; DEBUG:       select.false.sink:
+; DEBUG-NEXT:    [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]]
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata float [[DIV]]
+; DEBUG-NEXT:    br label [[SELECT_END]], !dbg
+; DEBUG:       select.end:
+; DEBUG-NEXT:    [[SEL:%.*]] = phi float [ 4.000000e+00, [[ENTRY:%.*]] ], [ [[DIV]], [[SELECT_FALSE_SINK]] ], !dbg
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata float [[SEL]]
+; DEBUG-NEXT:    ret float [[SEL]], !dbg
+;
+entry:
+  %div = fdiv float %a, %b
+  %cmp = fcmp ogt float %a, 3.0
+  %sel = select i1 %cmp, float 4.0, float %div
+  ret float %sel
+}
+
+define float @fdiv_both_sink(float %a, float %b) {
+; CHECK-LABEL: @fdiv_both_sink(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[A:%.*]], 5.000000e+00
+; CHECK-NEXT:    br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]]
+; CHECK:       select.true.sink:
+; CHECK-NEXT:    [[DIV1:%.*]] = fdiv float [[A]], [[B:%.*]]
+; CHECK-NEXT:    br label [[SELECT_END:%.*]]
+; CHECK:       select.false.sink:
+; CHECK-NEXT:    [[DIV2:%.*]] = fdiv float [[B]], [[A]]
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[SEL:%.*]] = phi float [ [[DIV1]], [[SELECT_TRUE_SINK]] ], [ [[DIV2]], [[SELECT_FALSE_SINK]] ]
+; CHECK-NEXT:    ret float [[SEL]]
+;
+entry:
+  %div1 = fdiv float %a, %b
+  %div2 = fdiv float %b, %a
+  %cmp = fcmp ogt float %a, 5.0
+  %sel = select i1 %cmp, float %div1, float %div2
+  ret float %sel
+}
+
+; But if the select is marked unpredictable, then don't turn it into a branch.
+
+define float @unpredictable_select(float %a, float %b) {
+; CHECK-LABEL: @unpredictable_select(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[A]], 1.000000e+00
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], float [[DIV]], float 2.000000e+00, !unpredictable !0
+; CHECK-NEXT:    ret float [[SEL]]
+;
+entry:
+  %div = fdiv float %a, %b
+  %cmp = fcmp ogt float %a, 1.0
+  %sel = select i1 %cmp, float %div, float 2.0, !unpredictable !0
+  ret float %sel
+}
+
+!0 = !{}
+
+; An 'fadd' is not too expensive, so it's ok to speculate.
+
+define float @fadd_no_sink(float %a, float %b) {
+; CHECK-LABEL: @fadd_no_sink(
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float 6.000000e+00, [[A]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], float [[ADD]], float 7.000000e+00
+; CHECK-NEXT:    ret float [[SEL]]
+;
+  %add = fadd float %a, %b
+  %cmp = fcmp ogt float 6.0, %a
+  %sel = select i1 %cmp, float %add, float 7.0
+  ret float %sel
+}
+
+; Possible enhancement: sinkability is only calculated with the direct
+; operand of the select, so we don't try to sink this. The fdiv cost is not
+; taken into account.
+
+define float @fdiv_no_sink(float %a, float %b) {
+; CHECK-LABEL: @fdiv_no_sink(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[DIV]], [[B]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[A]], 1.000000e+00
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], float [[ADD]], float 8.000000e+00
+; CHECK-NEXT:    ret float [[SEL]]
+;
+entry:
+  %div = fdiv float %a, %b
+  %add = fadd float %div, %b
+  %cmp = fcmp ogt float %a, 1.0
+  %sel = select i1 %cmp, float %add, float 8.0
+  ret float %sel
+}
+
+; Do not transform the CFG if the select operands may have side effects.
+
+declare i64* @bar(i32, i32, i32)
+declare i64* @baz(i32, i32, i32)
+
+define i64* @calls_no_sink(i32 %in) {
+; CHECK-LABEL: @calls_no_sink(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i64* @bar(i32 1, i32 2, i32 3)
+; CHECK-NEXT:    [[CALL2:%.*]] = call i64* @baz(i32 1, i32 2, i32 3)
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[IN:%.*]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[TOBOOL]], i64* [[CALL1]], i64* [[CALL2]]
+; CHECK-NEXT:    ret i64* [[SEL]]
+;
+  %call1 = call i64* @bar(i32 1, i32 2, i32 3)
+  %call2 = call i64* @baz(i32 1, i32 2, i32 3)
+  %tobool = icmp ne i32 %in, 0
+  %sel = select i1 %tobool, i64* %call1, i64* %call2
+  ret i64* %sel
+}
+
+define i32 @sdiv_no_sink(i32 %a, i32 %b) {
+; CHECK-LABEL: @sdiv_no_sink(
+; CHECK-NEXT:    [[DIV1:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[DIV2:%.*]] = sdiv i32 [[B]], [[A]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A]], 5
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i32 [[DIV1]], i32 [[DIV2]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %div1 = sdiv i32 %a, %b
+  %div2 = sdiv i32 %b, %a
+  %cmp = icmp sgt i32 %a, 5
+  %sel = select i1 %cmp, i32 %div1, i32 %div2
+  ret i32 %sel
+}
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,543 @@
+; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true  %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-YES
+; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NO
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Can we sink for different base if there is no phi for base?
+define i32 @test1(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test1
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+  %c = phi i32* [%c1, %entry], [%c2, %if.then]
+  %v = load i32, i32* %c, align 4
+  ret i32 %v
+}
+
+; Can we sink for different base if there is phi for base?
+define i32 @test2(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test2
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br label %fallthrough
+
+fallthrough:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  %b = phi i64* [%b1, %entry], [%b2, %if.then]
+  %c = phi i32* [%c1, %entry], [%c2, %if.then]
+  %v = load i32, i32* %c, align 4
+  ret i32 %v
+}
+
+; Can we sink for different base if there is phi for base but not valid one?
+define i32 @test3(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test3
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+  %b = phi i64* [%b2, %entry], [%b1, %if.then]
+  %c = phi i32* [%c1, %entry], [%c2, %if.then]
+  %v = load i32, i32* %c, align 4
+  ret i32 %v
+}
+
+; Can we sink for different base if both addresses are in the same block?
+define i32 @test4(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test4
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+  %c = phi i32* [%c1, %entry], [%c2, %if.then]
+  %v = load i32, i32* %c, align 4
+  ret i32 %v
+}
+
+; Can we sink for different base if there is phi for base?
+; Both addresses are in the same block.
+define i32 @test5(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test5
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  br label %fallthrough
+
+fallthrough:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  %b = phi i64* [%b1, %entry], [%b2, %if.then]
+  %c = phi i32* [%c1, %entry], [%c2, %if.then]
+  %v = load i32, i32* %c, align 4
+  ret i32 %v
+}
+
+; Can we sink for different base if there is phi for base but not valid one?
+; Both addresses are in the same block.
+define i32 @test6(i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test6
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: load
+  %b = phi i64* [%b2, %entry], [%b1, %if.then]
+  %c = phi i32* [%c1, %entry], [%c2, %if.then]
+  %v = load i32, i32* %c, align 4
+  ret i32 %v
+}
+
+; case with a loop. No phi node.
+define i32 @test7(i32 %N, i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test7
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-YES: sunk_phi
+  %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough]
+  %c3 = phi i32* [%c1, %entry], [%c, %fallthrough]
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+  %c = phi i32* [%c3, %loop], [%c2, %if.then]
+  %v = load volatile i32, i32* %c, align 4
+  %iv.inc = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.inc, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %v
+}
+
+; case with a loop. There is phi node.
+define i32 @test8(i32 %N, i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test8
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  br label %loop
+
+loop:
+  %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough]
+  %c3 = phi i32* [%c1, %entry], [%c, %fallthrough]
+  %b3 = phi i64* [%b1, %entry], [%b, %fallthrough]
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br label %fallthrough
+
+fallthrough:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  %c = phi i32* [%c3, %loop], [%c2, %if.then]
+  %b = phi i64* [%b3, %loop], [%b2, %if.then]
+  %v = load volatile i32, i32* %c, align 4
+  %iv.inc = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.inc, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %v
+}
+
+; case with a loop. There is phi node but it does not fit.
+define i32 @test9(i32 %N, i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test9
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-YES: sunk_phi
+  %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough]
+  %c3 = phi i32* [%c1, %entry], [%c, %fallthrough]
+  %b3 = phi i64* [%b1, %entry], [%b2, %fallthrough]
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: load
+  %c = phi i32* [%c3, %loop], [%c2, %if.then]
+  %b = phi i64* [%b3, %loop], [%b2, %if.then]
+  %v = load volatile i32, i32* %c, align 4
+  %iv.inc = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.inc, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %v
+}
+
+; Case through a loop. No phi node.
+define i32 @test10(i32 %N, i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test10
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: br
+  %c = phi i32* [%c1, %entry], [%c2, %if.then]
+  br label %loop
+
+loop:
+  %iv = phi i32 [0, %fallthrough], [%iv.inc, %loop]
+  %iv.inc = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.inc, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+; CHECK-YES: sunkaddr
+  %v = load volatile i32, i32* %c, align 4
+  ret i32 %v
+}
+
+; Case through a loop. There is a phi.
+define i32 @test11(i32 %N, i1 %cond, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test11
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br label %fallthrough
+
+fallthrough:
+; CHECK: phi
+; CHECK: phi
+; CHECK: br
+  %c = phi i32* [%c1, %entry], [%c2, %if.then]
+  %b = phi i64* [%b1, %entry], [%b2, %if.then]
+  br label %loop
+
+loop:
+  %iv = phi i32 [0, %fallthrough], [%iv.inc, %loop]
+  %iv.inc = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.inc, %N
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+; CHECK: sunkaddr
+  %v = load volatile i32, i32* %c, align 4
+  ret i32 %v
+}
+
+; Complex case with address value from previous iteration.
+define i32 @test12(i32 %N, i1 %cond, i64* %b1, i64* %b2, i64* %b3) {
+; CHECK-LABEL: @test12
+entry:
+  %a1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %c1 = bitcast i64* %a1 to i32*
+  br label %loop
+
+loop:
+; CHECK-LABEL: loop:
+; CHECK-YES: sunk_phi
+; CHECK-NO: phi
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: br
+  %iv = phi i32 [0, %entry], [%iv.inc, %backedge]
+  %c3 = phi i32* [%c1, %entry], [%c, %backedge]
+  %b4 = phi i64* [%b1, %entry], [%b5, %backedge]
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %a2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %c2 = bitcast i64* %a2 to i32*
+  br label %fallthrough
+
+fallthrough:
+; CHECK-LABEL: fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO: phi
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: load
+  %c = phi i32* [%c3, %loop], [%c2, %if.then]
+  %b6 = phi i64* [%b4, %loop], [%b2, %if.then]
+  %v = load volatile i32, i32* %c, align 4
+  %a4 = getelementptr inbounds i64, i64* %b4, i64 5
+  %c4 = bitcast i64* %a4 to i32*
+  %cmp = icmp slt i32 %iv, 20
+  br i1 %cmp, label %backedge, label %if.then.2
+
+if.then.2:
+  br label %backedge
+
+backedge:
+  %b5 = phi i64* [%b4, %fallthrough], [%b6, %if.then.2]
+  %iv.inc = add i32 %iv, 1
+  %cmp2 = icmp slt i32 %iv.inc, %N
+  br i1 %cmp2, label %loop, label %exit
+
+exit:
+  ret i32 %v
+}
+
+%struct.S = type {i32, i32}
+; Case with index
+define i32 @test13(i1 %cond, %struct.S* %b1, %struct.S* %b2, i64 %Index) {
+; CHECK-LABEL: @test13
+entry:
+  %a1 = getelementptr inbounds %struct.S, %struct.S* %b1, i64 %Index, i32 1
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %i2 = mul i64 %Index, 2
+  %a2 = getelementptr inbounds %struct.S, %struct.S* %b2, i64 %Index, i32 1
+  br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO-NEXT: phi
+; CHECK-NO-NEXT: load
+  %a = phi i32* [%a1, %entry], [%a2, %if.then]
+  %v = load i32, i32* %a, align 4
+  ret i32 %v
+}
+
+; Select of Select case.
+define i64 @test14(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) {
+; CHECK-LABEL: @test14
+entry:
+; CHECK-LABEL: entry:
+  %g1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %g2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %g3 = getelementptr inbounds i64, i64* %b3, i64 5
+  %s1 = select i1 %c1, i64* %g1, i64* %g2
+  %s2 = select i1 %c2, i64* %s1, i64* %g3
+; CHECK: sunkaddr
+  %v = load i64 , i64* %s2, align 8
+  ret i64 %v
+}
+
+; Select of Phi case.
+define i64 @test15(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) {
+; CHECK-LABEL: @test15
+entry:
+  %g1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %g2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %g3 = getelementptr inbounds i64, i64* %b3, i64 5
+  br i1 %c1, label %if.then, label %fallthrough
+
+if.then:
+  br label %fallthrough
+
+fallthrough:
+; CHECK-LABEL: fallthrough:
+  %p1 = phi i64* [%g1, %entry], [%g2, %if.then]
+  %s1 = select i1 %c2, i64* %p1, i64* %g3
+; CHECK-YES: sunkaddr
+; CHECK-NO: phi
+; CHECK-NO-NEXT: select
+; CHECK-NO-NEXT: load
+  %v = load i64 , i64* %s1, align 8
+  ret i64 %v
+}
+
+; Select of Phi case. Phi exists
+define i64 @test16(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) {
+; CHECK-LABEL: @test16
+entry:
+  %g1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %g2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %g3 = getelementptr inbounds i64, i64* %b3, i64 5
+  br i1 %c1, label %if.then, label %fallthrough
+
+if.then:
+  br label %fallthrough
+
+fallthrough:
+; CHECK-LABEL: fallthrough:
+  %p = phi i64* [%b1, %entry], [%b2, %if.then]
+  %p1 = phi i64* [%g1, %entry], [%g2, %if.then]
+  %s1 = select i1 %c2, i64* %p1, i64* %g3
+; CHECK: sunkaddr
+  %v = load i64 , i64* %s1, align 8
+  ret i64 %v
+}
+
+; Phi of Select case.
+define i64 @test17(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) {
+; CHECK-LABEL: @test17
+entry:
+  %g1 = getelementptr inbounds i64, i64* %b1, i64 5
+  %g2 = getelementptr inbounds i64, i64* %b2, i64 5
+  %g3 = getelementptr inbounds i64, i64* %b3, i64 5
+  %s1 = select i1 %c2, i64* %g1, i64* %g2
+  br i1 %c1, label %if.then, label %fallthrough
+
+if.then:
+  br label %fallthrough
+
+fallthrough:
+; CHECK-LABEL: fallthrough:
+  %p1 = phi i64* [%s1, %entry], [%g3, %if.then]
+; CHECK-YES: sunkaddr
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+  %v = load i64 , i64* %p1, align 8
+  ret i64 %v
+}
+
+; The same two addr modes by different paths
+define i32 @test18(i1 %cond1, i1 %cond2, i64* %b1, i64* %b2) {
+; CHECK-LABEL: @test18
+entry:
+  %g1 = getelementptr inbounds i64, i64* %b2, i64 5
+  %bc1 = bitcast i64* %g1 to i32*
+  br i1 %cond1, label %if.then1, label %if.then2
+
+if.then1:
+  %g2 = getelementptr inbounds i64, i64* %b1, i64 5
+  %bc2 = bitcast i64* %g2 to i32*
+  br label %fallthrough
+
+if.then2:
+  %bc1_1 = bitcast i64* %g1 to i32*
+  br i1 %cond2, label %fallthrough, label %if.then3
+
+if.then3:
+  %bc1_2 = bitcast i64* %g1 to i32*
+  br label %fallthrough
+
+fallthrough:
+; CHECK-YES: sunk_phi
+; CHECK-NO-LABEL: fallthrough:
+; CHECK-NO: phi
+; CHECK-NO-NEXT: load
+  %c = phi i32* [%bc2, %if.then1], [%bc1_1, %if.then2], [%bc1_2, %if.then3]
+  %v1 = load i32, i32* %c, align 4
+  %g1_1 = getelementptr inbounds i64, i64* %b2, i64 5
+  %bc1_1_1 = bitcast i64* %g1_1 to i32*
+  %v2 = load i32, i32* %bc1_1_1, align 4
+  %v = add i32 %v1, %v2
+  ret i32 %v
+}
+
+; Different types but null is the first?
+define i32 @test19(i1 %cond1, i1 %cond2, i64* %b2, i8* %b1) {
+; CHECK-LABEL: @test19
+entry:
+  %g1 = getelementptr inbounds i64, i64* %b2, i64 5
+  %bc1 = bitcast i64* %g1 to i32*
+  br i1 %cond1, label %if.then1, label %if.then2
+
+if.then1:
+  %g2 = getelementptr inbounds i8, i8* %b1, i64 40
+  %bc2 = bitcast i8* %g2 to i32*
+  br label %fallthrough
+
+if.then2:
+  %bc1_1 = bitcast i64* %g1 to i32*
+  br i1 %cond2, label %fallthrough, label %if.then3
+
+if.then3:
+  %g3 = getelementptr inbounds i64, i64* null, i64 5
+  %bc1_2 = bitcast i64* %g3 to i32*
+  br label %fallthrough
+
+fallthrough:
+; CHECK-NOT: sunk_phi
+  %c = phi i32* [%bc2, %if.then1], [%bc1_1, %if.then2], [%bc1_2, %if.then3]
+  %v1 = load i32, i32* %c, align 4
+  %g1_1 = getelementptr inbounds i64, i64* %b2, i64 5
+  %bc1_1_1 = bitcast i64* %g1_1 to i32*
+  %v2 = load i32, i32* %bc1_1_1, align 4
+  %v = add i32 %v1, %v2
+  ret i32 %v
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-select=true  %s | FileCheck %s --check-prefix=CHECK
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Select when both offset and scale reg are present.
+define i64 @test1(i1 %c, i64* %b, i64 %scale) {
+; CHECK-LABEL: @test1
+entry:
+; CHECK-LABEL: entry:
+  %g = getelementptr inbounds i64, i64* %b, i64 %scale
+  %g1 = getelementptr inbounds i64, i64* %g, i64 8
+  %g2 = getelementptr inbounds i64, i64* %g, i64 16
+  %s = select i1 %c, i64* %g1, i64* %g2
+; CHECK-NOT: sunkaddr
+  %v = load i64 , i64* %s, align 8
+  ret i64 %v
+}
+
+ at gv1 = external global i8, align 16
+ at gv2 = external global i8, align 16
+
+; Select when both GV and base reg are present.
+define i8 @test2(i1 %c, i64 %b) {
+; CHECK-LABEL: @test2
+entry:
+; CHECK-LABEL: entry:
+  %g1 = getelementptr inbounds i8, i8* @gv1, i64 %b
+  %g2 = getelementptr inbounds i8, i8* @gv2, i64 %b
+  %s = select i1 %c, i8* %g1, i8* %g2
+; CHECK-NOT: sunkaddr
+  %v = load i8 , i8* %s, align 8
+  ret i8 %v
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false  %s | FileCheck %s --check-prefix=CHECK
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test() {
+entry:
+  %0 = getelementptr inbounds i64, i64 * null, i64 undef
+  br label %start
+
+start:
+  %val1 = phi i64 * [ %0, %entry ], [ %val4, %exit ]
+  %val2 = phi i64 * [ null, %entry ], [ %val5, %exit ]
+  br i1 false, label %slowpath, label %exit
+
+slowpath:
+  %elem1 = getelementptr inbounds i64, i64 * undef, i64 undef
+  br label %exit
+
+exit:
+; CHECK: sunkaddr
+  %val3 = phi i64 * [ undef, %slowpath ], [ %val2, %start ]
+  %val4 = phi i64 * [ %elem1, %slowpath ], [ %val1, %start ]
+  %val5 = phi i64 * [ undef, %slowpath ], [ %val2, %start ]
+  %loadx = load i64, i64 * %val4, align 8
+  br label %start
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,280 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = external global [1 x [2 x <4 x float>]]
+
+; Can we sink single addressing mode computation to use?
+define void @test1(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test1
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+entry:
+  %addr = getelementptr inbounds i64, i64* %base, i64 5
+  %casted = bitcast i64* %addr to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %v = load i32, i32* %casted, align 4
+  br label %fallthrough
+
+fallthrough:
+  ret void
+}
+
+declare void @foo(i32)
+
+; Make sure sinking two copies of addressing mode into different blocks works
+define void @test2(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test2
+entry:
+  %addr = getelementptr inbounds i64, i64* %base, i64 5
+  %casted = bitcast i64* %addr to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  %v1 = load i32, i32* %casted, align 4
+  call void @foo(i32 %v1)
+  %cmp = icmp eq i32 %v1, 0
+  br i1 %cmp, label %next, label %fallthrough
+
+next:
+; CHECK-LABEL: next:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  %v2 = load i32, i32* %casted, align 4
+  call void @foo(i32 %v2)
+  br label %fallthrough
+
+fallthrough:
+  ret void
+}
+
+; If we have two loads in the same block, only need one copy of addressing mode
+; - instruction selection will duplicate if needed
+define void @test3(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test3
+entry:
+  %addr = getelementptr inbounds i64, i64* %base, i64 5
+  %casted = bitcast i64* %addr to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  %v1 = load i32, i32* %casted, align 4
+  call void @foo(i32 %v1)
+; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
+  %v2 = load i32, i32* %casted, align 4
+  call void @foo(i32 %v2)
+  br label %fallthrough
+
+fallthrough:
+  ret void
+}
+
+; Can we still sink addressing mode if there's a cold use of the
+; address itself?  
+define void @test4(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test4
+entry:
+  %addr = getelementptr inbounds i64, i64* %base, i64 5
+  %casted = bitcast i64* %addr to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  %v1 = load i32, i32* %casted, align 4
+  call void @foo(i32 %v1)
+  %cmp = icmp eq i32 %v1, 0
+  br i1 %cmp, label %rare.1, label %fallthrough
+
+fallthrough:
+  ret void
+
+rare.1:
+; CHECK-LABEL: rare.1:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  call void @slowpath(i32 %v1, i32* %casted) cold
+  br label %fallthrough
+}
+
+; Negative test - don't want to duplicate addressing into hot path
+define void @test5(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test5
+entry:
+; CHECK: %addr = getelementptr inbounds
+  %addr = getelementptr inbounds i64, i64* %base, i64 5
+  %casted = bitcast i64* %addr to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
+  %v1 = load i32, i32* %casted, align 4
+  call void @foo(i32 %v1)
+  %cmp = icmp eq i32 %v1, 0
+  br i1 %cmp, label %rare.1, label %fallthrough
+
+fallthrough:
+  ret void
+
+rare.1:
+  call void @slowpath(i32 %v1, i32* %casted) ;; NOT COLD
+  br label %fallthrough
+}
+
+; Negative test - opt for size
+define void @test6(i1 %cond, i64* %base) minsize {
+; CHECK-LABEL: @test6
+entry:
+; CHECK: %addr = getelementptr
+  %addr = getelementptr inbounds i64, i64* %base, i64 5
+  %casted = bitcast i64* %addr to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40
+  %v1 = load i32, i32* %casted, align 4
+  call void @foo(i32 %v1)
+  %cmp = icmp eq i32 %v1, 0
+  br i1 %cmp, label %rare.1, label %fallthrough
+
+fallthrough:
+  ret void
+
+rare.1:
+  call void @slowpath(i32 %v1, i32* %casted) cold
+  br label %fallthrough
+}
+
+
+; Make sure sinking two copies of addressing mode into different blocks works
+; when there are cold paths for each.
+define void @test7(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test7
+entry:
+  %addr = getelementptr inbounds i64, i64* %base, i64 5
+  %casted = bitcast i64* %addr to i32*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  %v1 = load i32, i32* %casted, align 4
+  call void @foo(i32 %v1)
+  %cmp = icmp eq i32 %v1, 0
+  br i1 %cmp, label %rare.1, label %next
+
+next:
+; CHECK-LABEL: next:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  %v2 = load i32, i32* %casted, align 4
+  call void @foo(i32 %v2)
+  %cmp2 = icmp eq i32 %v2, 0
+  br i1 %cmp2, label %rare.1, label %fallthrough
+
+fallthrough:
+  ret void
+
+rare.1:
+; CHECK-LABEL: rare.1:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  call void @slowpath(i32 %v1, i32* %casted) cold
+  br label %next
+
+rare.2:
+; CHECK-LABEL: rare.2:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  call void @slowpath(i32 %v2, i32* %casted) cold
+  br label %fallthrough
+}
+
+declare void @slowpath(i32, i32*)
+
+; Make sure we don't end up in an infinite loop after we fail to sink.
+; CHECK-LABEL: define void @test8
+; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
+define void @test8() {
+allocas:
+  %aFOO_load = load float*, float** undef
+  %aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64
+  %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0
+  %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8*
+  %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef
+  br label %load.i145
+
+load.i145:
+  %ptr.i143 = bitcast i8* %ptr to <4 x float>*
+  %valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4
+  %x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0
+  br label %pl_loop.i.i122
+
+pl_loop.i.i122:
+  br label %pl_loop.i.i122
+}
+
+; Make sure we can sink address computation even
+; if there is a cycle in phi nodes.
+define void @test9(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test9
+entry:
+  %addr = getelementptr inbounds i64, i64* %base, i64 5
+  %casted = bitcast i64* %addr to i32*
+  br label %header
+
+header:
+  %iv = phi i32 [0, %entry], [%iv.inc, %backedge]
+  %casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge]
+  br i1 %cond, label %if.then, label %backedge
+
+if.then:
+  call void @foo(i32 %iv)
+  %addr.1 = getelementptr inbounds i64, i64* %base, i64 5
+  %casted.1 = bitcast i64* %addr.1 to i32*
+  br label %backedge
+
+backedge:
+; CHECK-LABEL: backedge:
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+  %casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then]
+  %v = load i32, i32* %casted.merged, align 4
+  call void @foo(i32 %v)
+  %iv.inc = add i32 %iv, 1
+  %cmp = icmp slt i32 %iv.inc, 1000
+  br i1 %cmp, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Make sure we can eliminate a select when both arguments perform equivalent
+; address computation.
+define void @test10(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test10
+; CHECK: getelementptr inbounds i8, {{.+}} 40
+; CHECK-NOT: select
+entry:
+  %gep1 = getelementptr inbounds i64, i64* %base, i64 5
+  %gep1.casted = bitcast i64* %gep1 to i32*
+  %base.casted = bitcast i64* %base to i32*
+  %gep2 = getelementptr inbounds i32, i32* %base.casted, i64 10
+  %casted.merged = select i1 %cond, i32* %gep1.casted, i32* %gep2
+  %v = load i32, i32* %casted.merged, align 4
+  call void @foo(i32 %v)
+  ret void
+}
+
+; Found by fuzzer, getSExtValue of > 64 bit constant
+define void @i96_mul(i1* %base, i96 %offset) {
+BB:
+  ;; RHS = 0x7FFFFFFFFFFFFFFFFFFFFFFF
+  %B84 = mul i96 %offset, 39614081257132168796771975167
+  %G23 = getelementptr i1, i1* %base, i96 %B84
+  store i1 false, i1* %G23
+  ret void
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s -check-prefix=CHECK -check-prefix=GEP
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @load_cast_gep
+; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
+; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 40
+define void @load_cast_gep(i1 %cond, i64* %base) {
+entry:
+  %addr = getelementptr inbounds i64, i64* %base, i64 5
+  %casted = addrspacecast i64* %addr to i32 addrspace(1)*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %v = load i32, i32 addrspace(1)* %casted, align 4
+  br label %fallthrough
+
+fallthrough:
+  ret void
+}
+
+; CHECK-LABEL: @store_gep_cast
+; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
+; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 20
+define void @store_gep_cast(i1 %cond, i64* %base) {
+entry:
+  %casted = addrspacecast i64* %base to i32 addrspace(1)*
+  %addr = getelementptr inbounds i32, i32 addrspace(1)* %casted, i64 5
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  store i32 0, i32 addrspace(1)* %addr, align 4
+  br label %fallthrough
+
+fallthrough:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/widen_switch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/widen_switch.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/widen_switch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,103 @@
+;; x86 is chosen to show the transform when 8-bit and 16-bit registers are available.
+
+; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown    | FileCheck %s --check-prefix=X86
+; RUN: opt < %s -debugify -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=DEBUG
+
+; No change for x86 because 16-bit registers are part of the architecture.
+
+define i32 @widen_switch_i16(i32 %a)  {
+entry:
+  %trunc = trunc i32 %a to i16
+  switch i16 %trunc, label %sw.default [
+    i16 1, label %sw.bb0
+    i16 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; X86-LABEL: @widen_switch_i16(
+; X86:       %trunc = trunc i32 %a to i16
+; X86-NEXT:  switch i16 %trunc, label %sw.default [
+; X86-NEXT:    i16 1, label %sw.bb0
+; X86-NEXT:    i16 -1, label %sw.bb1
+}
+
+; Widen to 32-bit from a smaller, non-native type.
+
+define i32 @widen_switch_i17(i32 %a)  {
+entry:
+  %trunc = trunc i32 %a to i17
+  switch i17 %trunc, label %sw.default [
+    i17 10, label %sw.bb0
+    i17 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; X86-LABEL: @widen_switch_i17(
+; X86:       %0 = zext i17 %trunc to i32
+; X86-NEXT:  switch i32 %0, label %sw.default [
+; X86-NEXT:    i32 10, label %sw.bb0
+; X86-NEXT:    i32 131071, label %sw.bb1
+
+; DEBUG-LABEL: @widen_switch_i17(
+; DEBUG:       zext i17 %trunc to i32, !dbg [[switch_loc:![0-9]+]]
+; DEBUG-NEXT:  switch i32 {{.*}} [
+; DEBUG-NEXT:    label %sw.bb0
+; DEBUG-NEXT:    label %sw.bb1
+; DEBUG-NEXT:  ], !dbg [[switch_loc]]
+}
+
+; If the switch condition is a sign-extended function argument, then the
+; condition and cases should be sign-extended rather than zero-extended
+; because the sign-extension can be optimized away.
+
+define i32 @widen_switch_i16_sext(i2 signext %a)  {
+entry:
+  switch i2 %a, label %sw.default [
+    i2 1, label %sw.bb0
+    i2 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; X86-LABEL: @widen_switch_i16_sext(
+; X86:       %0 = sext i2 %a to i8
+; X86-NEXT:  switch i8 %0, label %sw.default [
+; X86-NEXT:    i8 1, label %sw.bb0
+; X86-NEXT:    i8 -1, label %sw.bb1
+}
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -codegenprepare -mcpu=corei7 %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2
+; RUN: opt -S -codegenprepare -mcpu=bdver2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP
+; RUN: opt -S -codegenprepare -mcpu=core-avx2 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2
+; RUN: opt -S -codegenprepare -mcpu=skylake-avx512 %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512BW
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin10.9.0"
+
+define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) {
+; CHECK-LABEL: @test_8bit(
+; CHECK-NEXT:    [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK:       if_true:
+; CHECK-NEXT:    ret <16 x i8> [[MASK]]
+; CHECK:       if_false:
+; CHECK-NEXT:    [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]]
+; CHECK-NEXT:    ret <16 x i8> [[RES]]
+;
+  %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer
+  br i1 %tst, label %if_true, label %if_false
+
+if_true:
+  ret <16 x i8> %mask
+
+if_false:
+  %res = shl <16 x i8> %lhs, %mask
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) {
+; CHECK-SSE2-LABEL: @test_16bit(
+; CHECK-SSE2-NEXT:    [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-SSE2-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-SSE2:       if_true:
+; CHECK-SSE2-NEXT:    ret <8 x i16> [[MASK]]
+; CHECK-SSE2:       if_false:
+; CHECK-SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-SSE2-NEXT:    [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[TMP1]]
+; CHECK-SSE2-NEXT:    ret <8 x i16> [[RES]]
+;
+; CHECK-XOP-LABEL: @test_16bit(
+; CHECK-XOP-NEXT:    [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-XOP-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-XOP:       if_true:
+; CHECK-XOP-NEXT:    ret <8 x i16> [[MASK]]
+; CHECK-XOP:       if_false:
+; CHECK-XOP-NEXT:    [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[MASK]]
+; CHECK-XOP-NEXT:    ret <8 x i16> [[RES]]
+;
+; CHECK-AVX2-LABEL: @test_16bit(
+; CHECK-AVX2-NEXT:    [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-AVX2-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX2:       if_true:
+; CHECK-AVX2-NEXT:    ret <8 x i16> [[MASK]]
+; CHECK-AVX2:       if_false:
+; CHECK-AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-AVX2-NEXT:    [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[TMP1]]
+; CHECK-AVX2-NEXT:    ret <8 x i16> [[RES]]
+;
+; CHECK-AVX512BW-LABEL: @test_16bit(
+; CHECK-AVX512BW-NEXT:    [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-AVX512BW-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX512BW:       if_true:
+; CHECK-AVX512BW-NEXT:    ret <8 x i16> [[MASK]]
+; CHECK-AVX512BW:       if_false:
+; CHECK-AVX512BW-NEXT:    [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[MASK]]
+; CHECK-AVX512BW-NEXT:    ret <8 x i16> [[RES]]
+;
+  %mask = shufflevector <8 x i16> %tmp, <8 x i16> undef, <8 x i32> zeroinitializer
+  br i1 %tst, label %if_true, label %if_false
+
+if_true:
+  ret <8 x i16> %mask
+
+if_false:
+  %res = shl <8 x i16> %lhs, %mask
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
+; CHECK-LABEL: @test_notsplat(
+; CHECK-NEXT:    [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK:       if_true:
+; CHECK-NEXT:    ret <4 x i32> [[MASK]]
+; CHECK:       if_false:
+; CHECK-NEXT:    [[RES:%.*]] = shl <4 x i32> [[LHS:%.*]], [[MASK]]
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+  br i1 %tst, label %if_true, label %if_false
+
+if_true:
+  ret <4 x i32> %mask
+
+if_false:
+  %res = shl <4 x i32> %lhs, %mask
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) {
+; CHECK-SSE2-LABEL: @test_32bit(
+; CHECK-SSE2-NEXT:    [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+; CHECK-SSE2-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-SSE2:       if_true:
+; CHECK-SSE2-NEXT:    ret <4 x i32> [[MASK]]
+; CHECK-SSE2:       if_false:
+; CHECK-SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+; CHECK-SSE2-NEXT:    [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[TMP1]]
+; CHECK-SSE2-NEXT:    ret <4 x i32> [[RES]]
+;
+; CHECK-XOP-LABEL: @test_32bit(
+; CHECK-XOP-NEXT:    [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+; CHECK-XOP-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-XOP:       if_true:
+; CHECK-XOP-NEXT:    ret <4 x i32> [[MASK]]
+; CHECK-XOP:       if_false:
+; CHECK-XOP-NEXT:    [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[MASK]]
+; CHECK-XOP-NEXT:    ret <4 x i32> [[RES]]
+;
+; CHECK-AVX-LABEL: @test_32bit(
+; CHECK-AVX-NEXT:    [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+; CHECK-AVX-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX:       if_true:
+; CHECK-AVX-NEXT:    ret <4 x i32> [[MASK]]
+; CHECK-AVX:       if_false:
+; CHECK-AVX-NEXT:    [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[MASK]]
+; CHECK-AVX-NEXT:    ret <4 x i32> [[RES]]
+;
+  %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
+  br i1 %tst, label %if_true, label %if_false
+
+if_true:
+  ret <4 x i32> %mask
+
+if_false:
+  %res = ashr <4 x i32> %lhs, %mask
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) {
+; CHECK-SSE2-LABEL: @test_64bit(
+; CHECK-SSE2-NEXT:    [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-SSE2-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-SSE2:       if_true:
+; CHECK-SSE2-NEXT:    ret <2 x i64> [[MASK]]
+; CHECK-SSE2:       if_false:
+; CHECK-SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-SSE2-NEXT:    [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[TMP1]]
+; CHECK-SSE2-NEXT:    ret <2 x i64> [[RES]]
+;
+; CHECK-XOP-LABEL: @test_64bit(
+; CHECK-XOP-NEXT:    [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-XOP-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-XOP:       if_true:
+; CHECK-XOP-NEXT:    ret <2 x i64> [[MASK]]
+; CHECK-XOP:       if_false:
+; CHECK-XOP-NEXT:    [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[MASK]]
+; CHECK-XOP-NEXT:    ret <2 x i64> [[RES]]
+;
+; CHECK-AVX-LABEL: @test_64bit(
+; CHECK-AVX-NEXT:    [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-AVX-NEXT:    br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK-AVX:       if_true:
+; CHECK-AVX-NEXT:    ret <2 x i64> [[MASK]]
+; CHECK-AVX:       if_false:
+; CHECK-AVX-NEXT:    [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[MASK]]
+; CHECK-AVX-NEXT:    ret <2 x i64> [[RES]]
+;
+  %mask = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
+  br i1 %tst, label %if_true, label %if_false
+
+if_true:
+  ret <2 x i64> %mask
+
+if_false:
+  %res = lshr <2 x i64> %lhs, %mask
+  ret <2 x i64> %res
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/basic.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,86 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK-LABEL: @test1(
+; objectsize should fold to a constant, which causes the branch to fold to an
+; uncond branch. Next, we fold the control flow alltogether.
+; rdar://8785296
+define i32 @test1(i8* %ptr) nounwind ssp noredzone align 2 {
+entry:
+  %0 = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false, i1 false, i1 false)
+  %1 = icmp ugt i64 %0, 3
+  br i1 %1, label %T, label %trap
+
+; CHECK: entry:
+; CHECK-NOT: br label %
+
+trap:                                             ; preds = %0, %entry
+  tail call void @llvm.trap() noreturn nounwind
+  unreachable
+
+T:
+; CHECK: ret i32 4
+  ret i32 4
+}
+
+; CHECK-LABEL: @test_objectsize_null_flag(
+define i64 @test_objectsize_null_flag(i8* %ptr) {
+entry:
+  ; CHECK: ret i64 -1
+  %0 = tail call i64 @llvm.objectsize.i64(i8* null, i1 false, i1 true, i1 false)
+  ret i64 %0
+}
+
+; CHECK-LABEL: @test_objectsize_null_flag_min(
+define i64 @test_objectsize_null_flag_min(i8* %ptr) {
+entry:
+  ; CHECK: ret i64 0
+  %0 = tail call i64 @llvm.objectsize.i64(i8* null, i1 true, i1 true, i1 false)
+  ret i64 %0
+}
+
+; Test foldable null pointers because we evaluate them with non-exact modes in
+; CodeGenPrepare.
+; CHECK-LABEL: @test_objectsize_null_flag_noas0(
+define i64 @test_objectsize_null_flag_noas0() {
+entry:
+  ; CHECK: ret i64 -1
+  %0 = tail call i64 @llvm.objectsize.i64.p1i8(i8 addrspace(1)* null, i1 false,
+                                               i1 true, i1 false)
+  ret i64 %0
+}
+
+; CHECK-LABEL: @test_objectsize_null_flag_min_noas0(
+define i64 @test_objectsize_null_flag_min_noas0() {
+entry:
+  ; CHECK: ret i64 0
+  %0 = tail call i64 @llvm.objectsize.i64.p1i8(i8 addrspace(1)* null, i1 true,
+                                               i1 true, i1 false)
+  ret i64 %0
+}
+
+; CHECK-LABEL: @test_objectsize_null_known_flag_noas0
+define i64 @test_objectsize_null_known_flag_noas0() {
+entry:
+  ; CHECK: ret i64 -1
+  %0 = tail call i64 @llvm.objectsize.i64.p1i8(i8 addrspace(1)* null, i1 false,
+                                               i1 false, i1 false)
+  ret i64 %0
+}
+
+; CHECK-LABEL: @test_objectsize_null_known_flag_min_noas0
+define i64 @test_objectsize_null_known_flag_min_noas0() {
+entry:
+  ; CHECK: ret i64 0
+  %0 = tail call i64 @llvm.objectsize.i64.p1i8(i8 addrspace(1)* null, i1 true,
+                                               i1 false, i1 false)
+  ret i64 %0
+}
+
+
+declare i64 @llvm.objectsize.i64(i8*, i1, i1, i1) nounwind readonly
+declare i64 @llvm.objectsize.i64.p1i8(i8 addrspace(1)*, i1, i1, i1) nounwind readonly
+
+declare void @llvm.trap() nounwind

Added: llvm/trunk/test/Transforms/CodeGenPrepare/bitreverse-hang.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/bitreverse-hang.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/bitreverse-hang.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/bitreverse-hang.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,53 @@
+; RUN: opt < %s -loop-unroll -codegenprepare -S | FileCheck %s
+
+; This test is a worst-case scenario for bitreversal/byteswap detection.
+; After loop unrolling (the unrolled loop is unreadably large so it has been kept
+; rolled here), we have a binary tree of OR operands (as bitreversal detection
+; looks straight through shifts):
+;
+;  OR
+;  | \
+;  |  LSHR
+;  | /
+;  OR
+;  | \
+;  |  LSHR
+;  | /
+;  OR
+;
+; This results in exponential runtime. The loop here is 32 iterations which will
+; totally hang if we don't deal with this case cleverly.
+
+ at b = common global i32 0, align 4
+
+; CHECK: define i32 @fn1
+define i32 @fn1() #0 {
+entry:
+  %b.promoted = load i32, i32* @b, align 4, !tbaa !2
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ]
+  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %shr = lshr i32 %or4, 1
+  %or = or i32 %shr, %or4
+  %inc = add nuw nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, 32
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  store i32 %or, i32* @b, align 4, !tbaa !2
+  ret i32 undef
+}
+
+attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.8.0"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/builtin-condition.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/builtin-condition.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/builtin-condition.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/builtin-condition.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,123 @@
+; RUN: opt -codegenprepare -S  < %s | FileCheck %s
+
+; Ensure we act sanely on overflow.
+; CHECK-LABEL: define i32 @bar
+define i32 @bar() {
+entry:
+  ; CHECK: ret i32 -1
+  %az = alloca [2147483649 x i32], align 16
+  %a = alloca i8*, align 8
+  %arraydecay = getelementptr inbounds [2147483649 x i32], [2147483649 x i32]* %az, i32 0, i32 0
+  %0 = bitcast i32* %arraydecay to i8*
+  store i8* %0, i8** %a, align 8
+  %1 = load i8*, i8** %a, align 8
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false)
+  ret i32 %2
+}
+
+; CHECK-LABEL: define i32 @baz
+define i32 @baz(i32 %n) {
+entry:
+  ; CHECK: ret i32 -1
+  %az = alloca [1 x i32], align 16
+  %bz = alloca [4294967297 x i32], align 16
+  %tobool = icmp ne i32 %n, 0
+  %arraydecay = getelementptr inbounds [1 x i32], [1 x i32]* %az, i64 0, i64 0
+  %arraydecay1 = getelementptr inbounds [4294967297 x i32], [4294967297 x i32]* %bz, i64 0, i64 0
+  %cond = select i1 %tobool, i32* %arraydecay, i32* %arraydecay1
+  %0 = bitcast i32* %cond to i8*
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
+  ret i32 %1
+}
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1)
+
+; The following tests were generated by:
+; #include<stdlib.h>
+; #define STATIC_BUF_SIZE 10
+; #define LARGER_BUF_SIZE 30
+;
+; size_t foo1(int flag) {
+;   char *cptr;
+;   char chararray[LARGER_BUF_SIZE];
+;   char chararray2[STATIC_BUF_SIZE];
+;   if(flag)
+;     cptr = chararray2;
+;    else
+;     cptr = chararray;
+;
+;   return  __builtin_object_size(cptr, 2);
+; }
+;
+; size_t foo2(int n) {
+;   char Small[10];
+;   char Large[20];
+;   char *Ptr = n ? Small : Large + 19;
+;   return __builtin_object_size(Ptr, 0);
+; }
+;
+; void foo() {
+;   size_t ret;
+;   size_t ret1;
+;   ret = foo1(0);
+;   ret1 = foo2(0);
+;   printf("\n%d %d\n", ret, ret1);
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = private unnamed_addr constant [8 x i8] c"\0A%d %d\0A\00", align 1
+
+define i64 @foo1(i32 %flag) {
+entry:
+  %chararray = alloca [30 x i8], align 16
+  %chararray2 = alloca [10 x i8], align 1
+  %0 = getelementptr inbounds [30 x i8], [30 x i8]* %chararray, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 30, i8* %0)
+  %1 = getelementptr inbounds [10 x i8], [10 x i8]* %chararray2, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 10, i8* %1)
+  %tobool = icmp eq i32 %flag, 0
+  %cptr.0 = select i1 %tobool, i8* %0, i8* %1
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %cptr.0, i1 true)
+  call void @llvm.lifetime.end.p0i8(i64 10, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 30, i8* %0)
+  ret i64 %2
+; CHECK-LABEL: foo1
+; CHECK:  ret i64 10
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
+
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+define i64 @foo2(i32 %n) {
+entry:
+  %Small = alloca [10 x i8], align 1
+  %Large = alloca [20 x i8], align 16
+  %0 = getelementptr inbounds [10 x i8], [10 x i8]* %Small, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 10, i8* %0)
+  %1 = getelementptr inbounds [20 x i8], [20 x i8]* %Large, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 20, i8* %1)
+  %tobool = icmp ne i32 %n, 0
+  %add.ptr = getelementptr inbounds [20 x i8], [20 x i8]* %Large, i64 0, i64 19
+  %cond = select i1 %tobool, i8* %0, i8* %add.ptr
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %cond, i1 false)
+  call void @llvm.lifetime.end.p0i8(i64 20, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 10, i8* %0)
+  ret i64 %2
+; CHECK-LABEL: foo2
+; CHECK:  ret i64 10
+}
+
+define void @foo() {
+entry:
+  %call = tail call i64 @foo1(i32 0)
+  %call1 = tail call i64 @foo2(i32 0)
+  %call2 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i64 %call, i64 %call1)
+  ret void
+}
+
+declare i32 @printf(i8* nocapture readonly, ...)

Added: llvm/trunk/test/Transforms/CodeGenPrepare/crash-on-large-allocas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/crash-on-large-allocas.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/crash-on-large-allocas.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/crash-on-large-allocas.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; RUN: opt -S -codegenprepare %s -o - | FileCheck %s
+;
+; Ensure that we don't {crash,return a bad value} when given an alloca larger
+; than what a pointer can represent.
+
+target datalayout = "p:16:16"
+
+; CHECK-LABEL: @alloca_overflow_is_unknown(
+define i16 @alloca_overflow_is_unknown() {
+  %i = alloca i8, i32 65537
+  %j = call i16 @llvm.objectsize.i16.p0i8(i8* %i, i1 false, i1 false, i1 false)
+  ; CHECK: ret i16 -1
+  ret i16 %j
+}
+
+declare i16 @llvm.objectsize.i16.p0i8(i8*, i1, i1, i1)

Added: llvm/trunk/test/Transforms/CodeGenPrepare/dom-tree.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/dom-tree.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/dom-tree.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/dom-tree.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,41 @@
+; RUN: opt -S -loop-unroll -codegenprepare < %s -domtree -analyze | FileCheck %s
+;
+; Checks that the dom tree is properly invalidated after an operation that will
+; invalidate it in CodeGenPrepare. If the domtree isn't properly invalidated,
+; this will likely segfault, or print badref.
+
+; CHECK-NOT: <badref>
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+define i32 @f(i32 %a) #0 {
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret i32 %or
+
+for.body:
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %b.07 = phi i32 [ 0, %entry ], [ %or, %for.body ]
+  %shr = lshr i32 %a, %i.08
+  %and = and i32 %shr, 1
+  %sub = sub nuw nsw i32 31, %i.08
+  %shl = shl i32 %and, %sub
+  %or = or i32 %shl, %b.07
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 32
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !3
+}
+
+attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!"clang version 3.8.0"}
+!3 = distinct !{!3, !4}
+!4 = !{!"llvm.loop.unroll.full"}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/gep-unmerging.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/gep-unmerging.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/gep-unmerging.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/gep-unmerging.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,60 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+ at exit_addr = constant i8* blockaddress(@gep_unmerging, %exit)
+ at op1_addr = constant i8* blockaddress(@gep_unmerging, %op1)
+ at op2_addr = constant i8* blockaddress(@gep_unmerging, %op2)
+ at op3_addr = constant i8* blockaddress(@gep_unmerging, %op3)
+ at dummy = global i8 0
+
+define void @gep_unmerging(i1 %pred, i8* %p0) {
+entry:
+  %table = alloca [256 x i8*]
+  %table_0 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 0
+  %table_1 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 1
+  %table_2 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 2
+  %table_3 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 3
+  %exit_a = load i8*, i8** @exit_addr
+  %op1_a = load i8*, i8** @op1_addr
+  %op2_a = load i8*, i8** @op2_addr
+  %op3_a = load i8*, i8** @op3_addr
+  store i8* %exit_a, i8** %table_0
+  store i8* %op1_a, i8** %table_1
+  store i8* %op2_a, i8** %table_2
+  store i8* %op3_a, i8** %table_3
+  br label %indirectbr
+
+op1:
+; CHECK-LABEL: op1:
+; CHECK-NEXT: %p1_inc2 = getelementptr i8, i8* %p_postinc, i64 2
+; CHECK-NEXT: %p1_inc1 = getelementptr i8, i8* %p_postinc, i64 1
+  %p1_inc2 = getelementptr i8, i8* %p_preinc, i64 3
+  %p1_inc1 = getelementptr i8, i8* %p_preinc, i64 2
+  %a10 = load i8, i8* %p_postinc
+  %a11 = load i8, i8* %p1_inc1
+  %a12 = add i8 %a10, %a11
+  store i8 %a12, i8* @dummy
+  br i1 %pred, label %indirectbr, label %exit
+
+op2:
+; CHECK-LABEL: op2:
+; CHECK-NEXT: %p2_inc = getelementptr i8, i8* %p_postinc, i64 1
+  %p2_inc = getelementptr i8, i8* %p_preinc, i64 2
+  %a2 = load i8, i8* %p_postinc
+  store i8 %a2, i8* @dummy
+  br i1 %pred, label %indirectbr, label %exit
+
+op3:
+  br i1 %pred, label %indirectbr, label %exit
+
+indirectbr:
+  %p_preinc = phi i8* [%p0, %entry], [%p1_inc2, %op1], [%p2_inc, %op2], [%p_postinc, %op3]
+  %p_postinc = getelementptr i8, i8* %p_preinc, i64 1
+  %next_op = load i8, i8* %p_preinc
+  %p_zext = zext i8 %next_op to i64
+  %slot = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 %p_zext 
+  %target = load i8*, i8** %slot
+  indirectbr i8* %target, [label %exit, label %op1, label %op2]
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/invariant.group.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/invariant.group.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/invariant.group.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/invariant.group.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+ at tmp = global i8 0
+
+; CHECK-LABEL: define void @foo() {
+define void @foo() {
+enter:
+  ; CHECK-NOT: !invariant.group
+  ; CHECK-NOT: @llvm.launder.invariant.group.p0i8(
+  ; CHECK: %val = load i8, i8* @tmp{{$}}
+  %val = load i8, i8* @tmp, !invariant.group !0
+  %ptr = call i8* @llvm.launder.invariant.group.p0i8(i8* @tmp)
+  
+  ; CHECK: store i8 42, i8* @tmp{{$}}
+  store i8 42, i8* %ptr, !invariant.group !0
+  
+  ret void
+}
+; CHECK-LABEL: }
+
+; CHECK-LABEL: define void @foo2() {
+define void @foo2() {
+enter:
+  ; CHECK-NOT: !invariant.group
+  ; CHECK-NOT: @llvm.strip.invariant.group.p0i8(
+  ; CHECK: %val = load i8, i8* @tmp{{$}}
+  %val = load i8, i8* @tmp, !invariant.group !0
+  %ptr = call i8* @llvm.strip.invariant.group.p0i8(i8* @tmp)
+
+  ; CHECK: store i8 42, i8* @tmp{{$}}
+  store i8 42, i8* %ptr, !invariant.group !0
+
+  ret void
+}
+; CHECK-LABEL: }
+
+
+declare i8* @llvm.launder.invariant.group.p0i8(i8*)
+declare i8* @llvm.strip.invariant.group.p0i8(i8*)
+!0 = !{}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/nonintegral.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/nonintegral.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/nonintegral.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/nonintegral.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,68 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+; RUN: opt -S -codegenprepare -addr-sink-using-gep=false < %s | FileCheck %s
+
+; This target data layout is modified to have a non-integral addrspace(1),
+; in order to verify that codegenprepare does not try to introduce illegal
+; inttoptrs.
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test_simple(i1 %cond, i64 addrspace(1)* %base) {
+; CHECK-LABEL: @test_simple
+; CHECK-NOT: inttoptr {{.*}} to i64 addrspace(1)*
+entry:
+  %addr = getelementptr inbounds i64, i64 addrspace(1)* %base, i64 5
+  %casted = bitcast i64 addrspace(1)* %addr to i32 addrspace(1)*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %v = load i32, i32 addrspace(1)* %casted, align 4
+  br label %fallthrough
+
+fallthrough:
+  ret void
+}
+
+
+define void @test_inttoptr_base(i1 %cond, i64 %base) {
+; CHECK-LABEL: @test_inttoptr_base
+; CHECK-NOT: inttoptr {{.*}} to i64 addrspace(1)*
+entry:
+; Doing the inttoptr in the integral addrspace(0) followed by an explicit
+; (frontend-introduced) addrspacecast is fine. We cannot however introduce
+; a direct inttoptr to addrspace(1)
+  %baseptr = inttoptr i64 %base to i64*
+  %baseptrni = addrspacecast i64 *%baseptr to i64 addrspace(1)*
+  %addr = getelementptr inbounds i64, i64 addrspace(1)* %baseptrni, i64 5
+  %casted = bitcast i64 addrspace(1)* %addr to i32 addrspace(1)*
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %v = load i32, i32 addrspace(1)* %casted, align 4
+  br label %fallthrough
+
+fallthrough:
+  ret void
+}
+
+define void @test_ptrtoint_base(i1 %cond, i64 addrspace(1)* %base) {
+; CHECK-LABEL: @test_ptrtoint_base
+; CHECK-NOT: ptrtoint addrspace(1)* {{.*}} to i64
+entry:
+; This one is inserted by the frontend, so it's fine. We're not allowed to
+; directly ptrtoint %base ourselves though
+  %baseptr0 = addrspacecast i64 addrspace(1)* %base to i64*
+  %toint = ptrtoint i64* %baseptr0 to i64
+  %added = add i64 %toint, 8
+  %toptr = inttoptr i64 %added to i64*
+  %geped = getelementptr i64, i64* %toptr, i64 2
+  br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+  %v = load i64, i64* %geped, align 4
+  br label %fallthrough
+
+fallthrough:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,57 @@
+; RUN: opt < %s -codegenprepare -S | FileCheck %s
+
+target triple = "x86_64-pc-linux-gnu"
+
+; This tests that hot/cold functions get correct section prefix assigned
+
+; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
+; The entry is hot
+define void @hot_func() !prof !15 {
+  ret void
+}
+
+; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
+; The sum of 2 callsites are hot
+define void @hot_call_func() !prof !16 {
+  call void @hot_func(), !prof !17
+  call void @hot_func(), !prof !17
+  ret void
+}
+
+; CHECK-NOT: normal_func{{.*}}!section_prefix
+; The sum of all callsites are neither hot or cold
+define void @normal_func() !prof !16 {
+  call void @hot_func(), !prof !17
+  call void @hot_func(), !prof !18
+  call void @hot_func(), !prof !18
+  ret void
+}
+
+; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
+; The entry and the callsite are both cold
+define void @cold_func() !prof !16 {
+  call void @hot_func(), !prof !18
+  ret void
+}
+
+; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}
+; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"function_entry_count", i64 1000}
+!16 = !{!"function_entry_count", i64 1}
+!17 = !{!"branch_weights", i32 80}
+!18 = !{!"branch_weights", i32 1}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/section.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/section.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/section.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/section.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,84 @@
+; RUN: opt < %s -codegenprepare -S | FileCheck %s
+
+target triple = "x86_64-pc-linux-gnu"
+
+; This tests that hot/cold functions get correct section prefix assigned
+
+; CHECK: hot_func1{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
+; The entry is hot
+define void @hot_func1() !prof !15 {
+  ret void
+}
+
+; CHECK: hot_func2{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
+; Entry is cold but inner block is hot
+define void @hot_func2(i32 %n) !prof !16 {
+entry:
+  %n.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %n, i32* %n.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %n.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end, !prof !19
+
+for.body:
+  %2 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %2, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+; For instrumentation based PGO, we should only look at block counts,
+; not call site VP metadata (which can exist on value profiled memcpy,
+; or possibly left behind after static analysis based devirtualization).
+; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
+define void @cold_func1() !prof !16 {
+  call void @hot_func1(), !prof !17
+  call void @hot_func1(), !prof !17
+  ret void
+}
+
+; CHECK: cold_func2{{.*}}!section_prefix ![[COLD_ID]]
+define void @cold_func2() !prof !16 {
+  call void @hot_func1(), !prof !17
+  call void @hot_func1(), !prof !18
+  call void @hot_func1(), !prof !18
+  ret void
+}
+
+; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]]
+define void @cold_func3() !prof !16 {
+  call void @hot_func1(), !prof !18
+  ret void
+}
+
+; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}
+; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"function_entry_count", i64 1000}
+!16 = !{!"function_entry_count", i64 1}
+!17 = !{!"branch_weights", i32 80}
+!18 = !{!"branch_weights", i32 1}
+!19 = !{!"branch_weights", i32 1000, i32 1}

Added: llvm/trunk/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,110 @@
+; REQUIRES: aarch64-registered-target
+; RUN: opt -codegenprepare -mtriple=arm64-apple=ios -S -o - %s | FileCheck %s
+
+ at first_ones = external global [65536 x i8]
+
+define i32 @fct19(i64 %arg1) #0 !dbg !6 {
+; CHECK-LABEL: @fct19
+entry:
+  %x.sroa.1.0.extract.shift = lshr i64 %arg1, 16, !dbg !35
+  %x.sroa.1.0.extract.trunc = trunc i64 %x.sroa.1.0.extract.shift to i16, !dbg !36
+
+  %x.sroa.3.0.extract.shift = lshr i64 %arg1, 32, !dbg !37
+  call void @llvm.dbg.value(metadata i64 %x.sroa.3.0.extract.shift, metadata !13, metadata !DIExpression()), !dbg !37
+; CHECK: call void @llvm.dbg.value(metadata i64 %arg1, metadata {{.*}}, metadata !DIExpression(DW_OP_constu, 32, DW_OP_shr, DW_OP_stack_value)), !dbg [[shift2_loc:![0-9]+]]
+
+  %x.sroa.5.0.extract.shift = lshr i64 %arg1, 48, !dbg !38
+  %tobool = icmp eq i64 %x.sroa.5.0.extract.shift, 0, !dbg !39
+  br i1 %tobool, label %if.end, label %if.then, !dbg !40
+
+if.then:                                          ; preds = %entry
+  %arrayidx3 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %x.sroa.5.0.extract.shift, !dbg !41
+  %0 = load i8, i8* %arrayidx3, align 1, !dbg !42
+  %conv = zext i8 %0 to i32, !dbg !43
+  br label %return, !dbg !44
+
+if.end:                                           ; preds = %entry
+; CHECK-LABEL: if.end:
+; CHECK-NEXT: lshr i64 %arg1, 32, !dbg [[shift2_loc]]
+  %x.sroa.3.0.extract.trunc = trunc i64 %x.sroa.3.0.extract.shift to i16, !dbg !45
+  %tobool6 = icmp eq i16 %x.sroa.3.0.extract.trunc, 0, !dbg !46
+  br i1 %tobool6, label %if.end13, label %if.then7, !dbg !47
+
+if.then7:                                         ; preds = %if.end
+  %idxprom10 = and i64 %x.sroa.3.0.extract.shift, 65535, !dbg !48
+  %arrayidx11 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom10, !dbg !49
+  %1 = load i8, i8* %arrayidx11, align 1, !dbg !50
+  %conv12 = zext i8 %1 to i32, !dbg !51
+  %add = add nsw i32 %conv12, 16, !dbg !52
+  br label %return, !dbg !53
+
+if.end13:                                         ; preds = %if.end
+; CHECK-LABEL: if.end13:
+; CHECK-NEXT: [[shift1:%.*]] = lshr i64 %arg1, 16, !dbg [[shift1_loc:![0-9]+]]
+; CHECK-NEXT: trunc i64 [[shift1]] to i16, !dbg [[trunc1_loc:![0-9]+]]
+  %tobool16 = icmp eq i16 %x.sroa.1.0.extract.trunc, 0, !dbg !54
+  br i1 %tobool16, label %return, label %if.then17, !dbg !55
+
+if.then17:                                        ; preds = %if.end13
+  %idxprom20 = and i64 %x.sroa.1.0.extract.shift, 65535, !dbg !56
+  %arrayidx21 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom20, !dbg !57
+  %2 = load i8, i8* %arrayidx21, align 1, !dbg !58
+  %conv22 = zext i8 %2 to i32, !dbg !59
+  %add23 = add nsw i32 %conv22, 32, !dbg !60
+  br label %return, !dbg !61
+
+return:                                           ; preds = %if.then17, %if.end13, %if.then7, %if.then
+  %retval.0 = phi i32 [ %conv, %if.then ], [ %add, %if.then7 ], [ %add23, %if.then17 ], [ 64, %if.end13 ], !dbg !62
+  ret i32 %retval.0, !dbg !63
+}
+
+; CHECK: [[shift1_loc]] = !DILocation(line: 1
+; CHECK: [[trunc1_loc]] = !DILocation(line: 2
+; CHECK: [[shift2_loc]] = !DILocation(line: 3
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind readonly ssp }
+attributes #1 = { nounwind readnone speculatable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "sink-shift-and-trunc.ll", directory: "/")
+!2 = !{}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = distinct !DISubprogram(name: "fct19", linkageName: "fct19", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8)
+!7 = !DISubroutineType(types: !2)
+!8 = !{!13}
+!10 = !DIBasicType(name: "ty64", size: 64, encoding: DW_ATE_unsigned)
+!13 = !DILocalVariable(name: "3", scope: !6, file: !1, line: 3, type: !10)
+!35 = !DILocation(line: 1, column: 1, scope: !6)
+!36 = !DILocation(line: 2, column: 1, scope: !6)
+!37 = !DILocation(line: 3, column: 1, scope: !6)
+!38 = !DILocation(line: 4, column: 1, scope: !6)
+!39 = !DILocation(line: 5, column: 1, scope: !6)
+!40 = !DILocation(line: 6, column: 1, scope: !6)
+!41 = !DILocation(line: 7, column: 1, scope: !6)
+!42 = !DILocation(line: 8, column: 1, scope: !6)
+!43 = !DILocation(line: 9, column: 1, scope: !6)
+!44 = !DILocation(line: 10, column: 1, scope: !6)
+!45 = !DILocation(line: 11, column: 1, scope: !6)
+!46 = !DILocation(line: 12, column: 1, scope: !6)
+!47 = !DILocation(line: 13, column: 1, scope: !6)
+!48 = !DILocation(line: 14, column: 1, scope: !6)
+!49 = !DILocation(line: 15, column: 1, scope: !6)
+!50 = !DILocation(line: 16, column: 1, scope: !6)
+!51 = !DILocation(line: 17, column: 1, scope: !6)
+!52 = !DILocation(line: 18, column: 1, scope: !6)
+!53 = !DILocation(line: 19, column: 1, scope: !6)
+!54 = !DILocation(line: 20, column: 1, scope: !6)
+!55 = !DILocation(line: 21, column: 1, scope: !6)
+!56 = !DILocation(line: 22, column: 1, scope: !6)
+!57 = !DILocation(line: 23, column: 1, scope: !6)
+!58 = !DILocation(line: 24, column: 1, scope: !6)
+!59 = !DILocation(line: 25, column: 1, scope: !6)
+!60 = !DILocation(line: 26, column: 1, scope: !6)
+!61 = !DILocation(line: 27, column: 1, scope: !6)
+!62 = !DILocation(line: 28, column: 1, scope: !6)
+!63 = !DILocation(line: 29, column: 1, scope: !6)

Added: llvm/trunk/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,200 @@
+; RUN: opt -codegenprepare  < %s  -mtriple=aarch64-none-linux-gnu -S  | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; Expect to skip merging two empty blocks (sw.bb and sw.bb2) into sw.epilog
+; as both of them are unlikely executed.
+define i32 @f_switch(i32 %c)  {
+; CHECK-LABEL: @f_switch
+; CHECK-LABEL: entry:
+; CHECK: i32 10, label %sw.bb
+; CHECK: i32 20, label %sw.bb2
+entry:
+  switch i32 %c, label %sw.default [
+    i32 10, label %sw.bb
+    i32 20, label %sw.bb2
+    i32 30, label %sw.bb3
+    i32 40, label %sw.bb4
+  ], !prof !0
+
+sw.bb:                                            ; preds = %entry
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %entry
+  br label %sw.epilog
+
+sw.bb3:                                           ; preds = %entry
+  call void bitcast (void (...)* @callcase3 to void ()*)()
+  br label %sw.epilog
+
+sw.bb4:                                           ; preds = %entry
+  call void bitcast (void (...)* @callcase4 to void ()*)()
+  br label %sw.epilog
+
+sw.default:                                       ; preds = %entry
+  call void bitcast (void (...)* @calldefault to void ()*)()
+  br label %sw.epilog
+
+; CHECK-LABEL: sw.epilog:
+; CHECK: %fp.0 = phi void (...)* [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %sw.bb ]
+sw.epilog:                                        ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb
+  %fp.0 = phi void (...)* [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %sw.bb ]
+  %callee.knr.cast = bitcast void (...)* %fp.0 to void ()*
+  call void %callee.knr.cast()
+  ret i32 0
+}
+
+; Expect not to merge sw.bb2 because of the conflict in the incoming value from
+; sw.bb which is already merged.
+define i32 @f_switch2(i32 %c)  {
+; CHECK-LABEL: @f_switch2
+; CHECK-LABEL: entry:
+; CHECK: i32 10, label %sw.epilog
+; CHECK: i32 20, label %sw.bb2
+entry:
+  switch i32 %c, label %sw.default [
+    i32 10, label %sw.bb
+    i32 20, label %sw.bb2
+    i32 30, label %sw.bb3
+    i32 40, label %sw.bb4
+  ], !prof !1
+
+sw.bb:                                            ; preds = %entry
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %entry
+  br label %sw.epilog
+
+sw.bb3:                                           ; preds = %entry
+  call void bitcast (void (...)* @callcase3 to void ()*)()
+  br label %sw.epilog
+
+sw.bb4:                                           ; preds = %entry
+  call void bitcast (void (...)* @callcase4 to void ()*)()
+  br label %sw.epilog
+
+sw.default:                                       ; preds = %entry
+  call void bitcast (void (...)* @calldefault to void ()*)()
+  br label %sw.epilog
+
+; CHECK-LABEL: sw.epilog:
+; CHECK: %fp.0 = phi void (...)* [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %entry ]
+sw.epilog:                                        ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb
+  %fp.0 = phi void (...)* [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F2, %sw.bb2 ], [ @F1, %sw.bb ]
+  %callee.knr.cast = bitcast void (...)* %fp.0 to void ()*
+  call void %callee.knr.cast()
+  ret i32 0
+}
+
+; Multiple empty blocks should be considered together if all incoming values
+; from them are same.  We expect to merge both empty blocks (sw.bb and sw.bb2)
+; because the sum of frequencies are higer than the threshold.
+define i32 @f_switch3(i32 %c)  {
+; CHECK-LABEL: @f_switch3
+; CHECK-LABEL: entry:
+; CHECK: i32 10, label %sw.epilog
+; CHECK: i32 20, label %sw.epilog
+entry:
+  switch i32 %c, label %sw.default [
+    i32 10, label %sw.bb
+    i32 20, label %sw.bb2
+    i32 30, label %sw.bb3
+    i32 40, label %sw.bb4
+  ], !prof !2
+
+sw.bb:                                            ; preds = %entry
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %entry
+  br label %sw.epilog
+
+sw.bb3:                                           ; preds = %entry
+  call void bitcast (void (...)* @callcase3 to void ()*)()
+  br label %sw.epilog
+
+sw.bb4:                                           ; preds = %entry
+  call void bitcast (void (...)* @callcase4 to void ()*)()
+  br label %sw.epilog
+
+sw.default:                                       ; preds = %entry
+  call void bitcast (void (...)* @calldefault to void ()*)()
+  br label %sw.epilog
+
+; CHECK-LABEL: sw.epilog:
+; CHECK: %fp.0 = phi void (...)* [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F1, %entry ], [ @F1, %entry ]
+sw.epilog:                                        ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb
+  %fp.0 = phi void (...)* [ @FD, %sw.default ], [ @F4, %sw.bb4 ], [ @F3, %sw.bb3 ], [ @F1, %sw.bb2 ], [ @F1, %sw.bb ]
+  %callee.knr.cast = bitcast void (...)* %fp.0 to void ()*
+  call void %callee.knr.cast()
+  ret i32 0
+}
+
+declare void @F1(...) local_unnamed_addr
+declare void @F2(...) local_unnamed_addr
+declare void @F3(...) local_unnamed_addr
+declare void @F4(...) local_unnamed_addr
+declare void @FD(...) local_unnamed_addr
+declare void @callcase3(...) local_unnamed_addr
+declare void @callcase4(...) local_unnamed_addr
+declare void @calldefault(...) local_unnamed_addr
+
+!0 = !{!"branch_weights", i32 5, i32 1, i32 1,i32 5, i32 5}
+!1 = !{!"branch_weights", i32 1 , i32 5, i32 1,i32 1, i32 1}
+!2 = !{!"branch_weights", i32 1 , i32 4, i32 1,i32 1, i32 1}
+
+
+; This test that BFI/BPI is created without any assertion in isMergingEmptyBlockProfitable()
+; in the case where empty blocks are removed before creating BFI/BPI.
+ at b = common global i32 0, align 4
+ at a = common global i32* null, align 8
+define i32 @should_not_assert(i32 %i) local_unnamed_addr {
+entry:
+  %0 = load i32, i32* @b, align 4
+  %cond = icmp eq i32 %0, 6
+  br i1 %cond, label %while.cond.preheader, label %sw.epilog
+
+while.cond.preheader:                             ; preds = %entry
+  %1 = load i32*, i32** @a, align 8
+  %magicptr = ptrtoint i32* %1 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %1, i64 1
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond.preheader, %land.rhs
+  switch i64 %magicptr, label %land.rhs [
+    i64 32, label %while.cond2.loopexit
+    i64 0, label %while.cond2.loopexit
+  ]
+
+land.rhs:                                         ; preds = %while.cond
+  %2 = load i32, i32* %arrayidx, align 4
+  %tobool1 = icmp eq i32 %2, 0
+  br i1 %tobool1, label %while.cond2thread-pre-split.loopexit, label %while.cond
+
+while.cond2thread-pre-split.loopexit:             ; preds = %land.rhs
+  br label %while.cond2thread-pre-split
+
+while.cond2thread-pre-split:                      ; preds = %while.cond2thread-pre-split.loopexit, %while.body4
+  %.pr = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %while.cond2thread-pre-split.loopexit ]
+  br label %while.cond2
+
+while.cond2.loopexit:                             ; preds = %while.cond, %while.cond
+  br label %while.cond2
+
+while.cond2:                                      ; preds = %while.cond2.loopexit, %while.cond2thread-pre-split
+  %3 = phi i32* [ %.pr, %while.cond2thread-pre-split ], [ %1, %while.cond2.loopexit ]
+  %tobool3 = icmp eq i32* %3, null
+  br i1 %tobool3, label %sw.epilog, label %while.body4
+
+while.body4:                                      ; preds = %while.cond2
+  tail call void bitcast (void (...)* @fn2 to void ()*)()
+  %.pr.pre = load i32*, i32** @a, align 8
+  br label %while.cond2thread-pre-split
+
+sw.epilog:                                        ; preds = %while.cond2, %entry
+  ret i32 undef
+}
+
+
+declare void @fn2(...) local_unnamed_addr
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/split-indirect-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/split-indirect-loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/split-indirect-loop.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/split-indirect-loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+; Test that an invalid CFG is not created by splitIndirectCriticalEdges
+; transformation when the 'target' block is a loop to itself.
+
+; CHECK: .split:
+; CHECK: br label %while.body.clone
+; CHECK: if.else1:
+; CHECK: indirectbr
+; CHECK: while.body.clone:
+; CHECK: br label %.split
+
+define void @test() {
+entry:
+  br label %if.else
+
+if.else:
+  br i1 undef, label %while.body, label %preheader
+
+preheader:
+  br label %if.else1
+
+if.then:
+  unreachable
+
+while.body:
+  %dest.sroa = phi i32 [ %1, %while.body ], [ undef, %if.else1 ], [ undef, %if.else ]
+  %0 = inttoptr i32 %dest.sroa to i8*
+  %incdec.ptr = getelementptr inbounds i8, i8* %0, i32 -1
+  %1 = ptrtoint i8* %incdec.ptr to i32
+  store i8 undef, i8* %incdec.ptr, align 1
+  br label %while.body
+
+if.else1:
+  indirectbr i8* undef, [label %if.then, label %while.body, label %if.else, label %if.else1]
+}
+

Added: llvm/trunk/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/statepoint-relocate.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/statepoint-relocate.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/statepoint-relocate.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,149 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare zeroext i1 @return_i1()
+
+define i32 @test_sor_basic(i32* %base) gc "statepoint-example" {
+; CHECK: getelementptr i32, i32* %base, i32 15
+; CHECK: getelementptr i32, i32* %base-new, i32 15
+entry:
+       %ptr = getelementptr i32, i32* %base, i32 15
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %ret = load i32, i32* %ptr-new
+       ret i32 %ret
+}
+
+define i32 @test_sor_two_derived(i32* %base) gc "statepoint-example" {
+; CHECK: getelementptr i32, i32* %base, i32 15
+; CHECK: getelementptr i32, i32* %base, i32 12
+; CHECK: getelementptr i32, i32* %base-new, i32 12
+; CHECK: getelementptr i32, i32* %base-new, i32 15
+entry:
+       %ptr = getelementptr i32, i32* %base, i32 15
+       %ptr2 = getelementptr i32, i32* %base, i32 12
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
+       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
+       %ret = load i32, i32* %ptr-new
+       ret i32 %ret
+}
+
+define i32 @test_sor_ooo(i32* %base) gc "statepoint-example" {
+; CHECK: getelementptr i32, i32* %base, i32 15
+; CHECK: getelementptr i32, i32* %base-new, i32 15
+entry:
+       %ptr = getelementptr i32, i32* %base, i32 15
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       %ret = load i32, i32* %ptr-new
+       ret i32 %ret
+}
+
+define i32 @test_sor_gep_smallint([3 x i32]* %base) gc "statepoint-example" {
+; CHECK: getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 2
+; CHECK: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 2
+entry:
+       %ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 2
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
+       %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token %tok, i32 7, i32 7)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %ret = load i32, i32* %ptr-new
+       ret i32 %ret
+}
+
+define i32 @test_sor_gep_largeint([3 x i32]* %base) gc "statepoint-example" {
+; CHECK: getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 21
+; CHECK-NOT: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 21
+entry:
+       %ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 21
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
+       %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token %tok, i32 7, i32 7)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %ret = load i32, i32* %ptr-new
+       ret i32 %ret
+}
+
+define i32 @test_sor_noop(i32* %base) gc "statepoint-example" {
+; CHECK: getelementptr i32, i32* %base, i32 15
+; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
+entry:
+       %ptr = getelementptr i32, i32* %base, i32 15
+       %ptr2 = getelementptr i32, i32* %base, i32 12
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
+       %ret = load i32, i32* %ptr-new
+       ret i32 %ret
+}
+
+define i32 @test_sor_basic_wrong_order(i32* %base) gc "statepoint-example" {
+; CHECK-LABEL: @test_sor_basic_wrong_order
+; Here we have base relocate inserted after derived. Make sure that we don't
+; produce uses of the relocated base pointer before it's definition.
+entry:
+       %ptr = getelementptr i32, i32* %base, i32 15
+       ; CHECK: getelementptr i32, i32* %base, i32 15
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       ; CHECK: %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       ; CHECK-NEXT: getelementptr i32, i32* %base-new, i32 15
+       %ret = load i32, i32* %ptr-new
+       ret i32 %ret
+}
+
+define i32 @test_sor_noop_cross_bb(i1 %external-cond, i32* %base) gc "statepoint-example" {
+; CHECK-LABEL: @test_sor_noop_cross_bb
+; Here base relocate doesn't dominate derived relocate. Make sure that we don't
+; produce undefined use of the relocated base pointer.
+entry:
+       %ptr = getelementptr i32, i32* %base, i32 15
+       ; CHECK: getelementptr i32, i32* %base, i32 15
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+       br i1 %external-cond, label %left, label %right
+
+left:
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %ret-new = load i32, i32* %ptr-new
+       ret i32 %ret-new
+
+right:
+       %ptr-base = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       %ret-base = load i32, i32* %ptr-base
+       ret i32 %ret-base
+}
+
+define i32 @test_sor_noop_same_bb(i1 %external-cond, i32* %base) gc "statepoint-example" {
+; CHECK-LABEL: @test_sor_noop_same_bb
+; Here base relocate doesn't dominate derived relocate. Make sure that we don't
+; produce undefined use of the relocated base pointer.
+entry:
+       %ptr1 = getelementptr i32, i32* %base, i32 15
+       ; CHECK: getelementptr i32, i32* %base, i32 15
+       %ptr2 = getelementptr i32, i32* %base, i32 5
+       ; CHECK: getelementptr i32, i32* %base, i32 5
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr1, i32* %ptr2)
+       ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
+       %ret2-new = load i32, i32* %ptr2-new
+       ; CHECK: getelementptr i32, i32* %base-new, i32 5
+       %ptr1-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %ret1-new = load i32, i32* %ptr1-new
+       ; CHECK: getelementptr i32, i32* %base-new, i32 15
+       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       %ret-new = add i32 %ret2-new, %ret1-new
+       ret i32 %ret-new
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.relocate.p0i32(token, i32, i32)
+declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token, i32, i32)

Added: llvm/trunk/test/Transforms/CodeGenPrepare/widenable-condition.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/widenable-condition.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/widenable-condition.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/widenable-condition.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+; Check the idiomatic guard pattern to ensure it's lowered correctly.
+define void @test_guard(i1 %cond_0) {
+; CHECK-LABEL: @test_guard(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND_0:%.*]], label [[GUARDED:%.*]], label [[DEOPT:%.*]]
+; CHECK:       deopt:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    ret void
+; CHECK:       guarded:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt
+
+deopt:                                            ; preds = %entry
+  call void @foo()
+  ret void
+
+guarded:
+  ret void
+}
+
+;; Test a non-guard fastpath/slowpath case
+define void @test_triangle(i1 %cond_0) {
+; CHECK-LABEL: @test_triangle(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND_0:%.*]], label [[FASTPATH:%.*]], label [[SLOWPATH:%.*]]
+; CHECK:       fastpath:
+; CHECK-NEXT:    call void @bar()
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       slowpath:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %fastpath, label %slowpath
+
+fastpath:
+  call void @bar()
+  br label %merge
+
+slowpath:
+  call void @foo()
+  br label %merge
+
+merge:
+  ret void
+}
+
+
+; Demonstrate that resulting CFG simplifications are made
+define void @test_cfg_simplify() {
+; CHECK-LABEL: @test_cfg_simplify(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %widenable_cond3 = call i1 @llvm.experimental.widenable.condition()
+  br i1 %widenable_cond3, label %guarded2, label %deopt3
+
+deopt3:
+  call void @foo()
+  ret void
+
+guarded2:
+  %widenable_cond4 = call i1 @llvm.experimental.widenable.condition()
+  br i1 %widenable_cond4, label %merge1, label %slowpath1
+
+slowpath1:
+  call void @foo()
+  br label %merge1
+
+merge1:
+  ret void
+}
+
+
+declare void @foo()
+declare void @bar()
+
+; Function Attrs: inaccessiblememonly nounwind
+declare i1 @llvm.experimental.widenable.condition() #0
+
+attributes #0 = { inaccessiblememonly nounwind }

Added: llvm/trunk/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; Make sure that the constant propogator doesn't divide by zero!
+;
+; RUN: opt < %s -constprop
+;
+
+define i32 @test() {
+        %R = sdiv i32 12, 0             ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i32 @test2() {
+        %R = srem i32 12, 0             ; <i32> [#uses=1]
+        ret i32 %R
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2002-05-03-NotOperator.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2002-05-03-NotOperator.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2002-05-03-NotOperator.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,19 @@
+; This bug has to do with the fact that constant propagation was implemented in
+; terms of _logical_ not (! in C) instead of _bitwise_ not (~ in C).  This was
+; due to a spec change.
+
+; Fix #2: The unary not instruction now no longer exists. Change to xor.
+
+; RUN: opt < %s -constprop -S | \
+; RUN:   not grep "i32 0"
+
+define i32 @test1() {
+        %R = xor i32 123, -1            ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i32 @test2() {
+        %R = xor i32 -123, -1           ; <i32> [#uses=1]
+        ret i32 %R
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; SetCC on boolean values was not implemented!
+
+; RUN: opt < %s -constprop -die -S | \
+; RUN:   not grep set
+
+define i1 @test1() {
+        %A = icmp ule i1 true, false            ; <i1> [#uses=1]
+        %B = icmp uge i1 true, false            ; <i1> [#uses=1]
+        %C = icmp ult i1 false, true            ; <i1> [#uses=1]
+        %D = icmp ugt i1 true, false            ; <i1> [#uses=1]
+        %E = icmp eq i1 false, false            ; <i1> [#uses=1]
+        %F = icmp ne i1 false, true             ; <i1> [#uses=1]
+        %G = and i1 %A, %B              ; <i1> [#uses=1]
+        %H = and i1 %C, %D              ; <i1> [#uses=1]
+        %I = and i1 %E, %F              ; <i1> [#uses=1]
+        %J = and i1 %G, %H              ; <i1> [#uses=1]
+        %K = and i1 %I, %J              ; <i1> [#uses=1]
+        ret i1 %K
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/2003-05-12-DivideError.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2003-05-12-DivideError.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2003-05-12-DivideError.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2003-05-12-DivideError.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; Make sure that the constant propagator doesn't cause a sigfpe
+;
+; RUN: opt < %s -constprop
+;
+
+define i32 @test() {
+        %R = sdiv i32 -2147483648, -1           ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i32 @test2() {
+        %R = srem i32 -2147483648, -1           ; <i32> [#uses=1]
+        ret i32 %R
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,10 @@
+; RUN: opt < %s -constprop -S | \
+; RUN:    not grep "ret i1 false"
+
+ at b = external global [2 x {  }]         ; <[2 x {  }]*> [#uses=2]
+
+define i1 @f() {
+        %tmp.2 = icmp eq {  }* getelementptr ([2 x {  }], [2 x {  }]* @b, i32 0, i32 0), getelementptr ([2 x {  }], [2 x {  }]* @b, i32 0, i32 1)                ; <i1> [#uses=1]
+        ret i1 %tmp.2
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/2006-11-30-vector-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2006-11-30-vector-cast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2006-11-30-vector-cast.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2006-11-30-vector-cast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,10 @@
+; RUN: opt < %s -constprop -S | \
+; RUN:   grep "i32 -1"
+; RUN: opt < %s -constprop -S | \
+; RUN:   not grep zeroinitializer
+
+define <4 x i32> @test() {
+        %tmp40 = bitcast <2 x i64> bitcast (<4 x i32> < i32 0, i32 0, i32 -1, i32 0 > to <2 x i64>) to <4 x i32>; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %tmp40
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep "ret i1 false"
+define i1 @test() {
+        %X = trunc i32 320 to i1                ; <i1> [#uses=1]
+        ret i1 %X
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/2006-12-01-bool-casts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2006-12-01-bool-casts.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2006-12-01-bool-casts.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2006-12-01-bool-casts.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt < %s -constprop -S | \
+; RUN:    grep "ret i32 -1"
+; RUN: opt < %s -constprop -S | \
+; RUN:    grep "ret i32 1"
+
+define i32 @test1() {
+        %A = sext i1 true to i32                ; <i32> [#uses=1]
+        ret i32 %A
+}
+
+define i32 @test2() {
+        %A = zext i1 true to i32                ; <i32> [#uses=1]
+        ret i32 %A
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/2007-02-05-BitCast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2007-02-05-BitCast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2007-02-05-BitCast.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2007-02-05-BitCast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,7 @@
+; RUN: opt < %s -constprop -S | grep 1065353216
+
+define i32 @test() {
+        %A = bitcast float 1.000000e+00 to i32          ; <i32> [#uses=1]
+        ret i32 %A
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/2007-02-23-sdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2007-02-23-sdiv.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2007-02-23-sdiv.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2007-02-23-sdiv.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llvm-dis | grep "global i32 0"
+; PR1215
+
+ at G = global i32 sdiv (i32 0, i32 -1)
+

Added: llvm/trunk/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt < %s -constprop -disable-output
+; PR2529
+define <4 x i1> @test1(i32 %argc, i8** %argv) {
+entry:  
+        %foo = icmp slt <4 x i32> undef, <i32 14, i32 undef, i32 undef, i32 undef>
+        ret <4 x i1> %foo
+}
+
+define <4 x i1> @test2(i32 %argc, i8** %argv) {
+entry:  
+        %foo = icmp slt <4 x i32> <i32 undef, i32 undef, i32 undef, i32
+undef>, <i32 undef, i32 undef, i32 undef, i32 undef>
+        ret <4 x i1> %foo
+}
+
+
+define <4 x i1> @test3() {
+       %foo = fcmp ueq <4 x float> <float 0.0, float 0.0, float 0.0, float
+undef>, <float 1.0, float 1.0, float 1.0, float undef>
+	ret <4 x i1> %foo
+}
+
+define <4 x i1> @test4() {
+	%foo = fcmp ueq <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <float 1.0, float 1.0, float 1.0, float 0.0>
+
+	ret <4 x i1> %foo
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/2009-06-20-constexpr-zero-lhs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2009-06-20-constexpr-zero-lhs.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2009-06-20-constexpr-zero-lhs.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2009-06-20-constexpr-zero-lhs.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis | not grep ptrtoint
+; PR4424
+ at G = external global i32
+ at test1 = constant i32 sdiv (i32 0, i32 ptrtoint (i32* @G to i32))
+ at test2 = constant i32 udiv (i32 0, i32 ptrtoint (i32* @G to i32))
+ at test3 = constant i32 srem (i32 0, i32 ptrtoint (i32* @G to i32))
+ at test4 = constant i32 urem (i32 0, i32 ptrtoint (i32* @G to i32))
+ at test5 = constant i32 lshr (i32 0, i32 ptrtoint (i32* @G to i32))
+ at test6 = constant i32 ashr (i32 0, i32 ptrtoint (i32* @G to i32))
+ at test7 = constant i32 shl (i32 0, i32 ptrtoint (i32* @G to i32))
+

Added: llvm/trunk/test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt < %s -constprop | llvm-dis
+; PR4848
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%0 = type { %struct.anon }
+%1 = type { %0, %2, [24 x i8] }
+%2 = type <{ %3, %3 }>
+%3 = type { %struct.hrtimer_cpu_base*, i32, %struct.rb_root, %struct.rb_node*, %struct.pgprot, i64 ()*, [16 x i8] }
+%struct.anon = type { }
+%struct.hrtimer_clock_base = type { %struct.hrtimer_cpu_base*, i32, %struct.rb_root, %struct.rb_node*, %struct.pgprot, i64 ()*, %struct.pgprot, %struct.pgprot }
+%struct.hrtimer_cpu_base = type { %0, [2 x %struct.hrtimer_clock_base], %struct.pgprot, i32, i64 }
+%struct.pgprot = type { i64 }
+%struct.rb_node = type { i64, %struct.rb_node*, %struct.rb_node* }
+%struct.rb_root = type { %struct.rb_node* }
+
+ at per_cpu__hrtimer_bases = external global %1, align 8 ; <%1*> [#uses=1]
+
+define void @init_hrtimers_cpu(i32 %cpu) nounwind noredzone section ".cpuinit.text" {
+entry:
+  %tmp3 = getelementptr %struct.hrtimer_cpu_base, %struct.hrtimer_cpu_base* bitcast (%1* @per_cpu__hrtimer_bases to %struct.hrtimer_cpu_base*), i32 0, i32 0 ; <%0*> [#uses=1]
+  %tmp5 = bitcast %0* %tmp3 to i8*                ; <i8*> [#uses=0]
+  unreachable
+}

Added: llvm/trunk/test/Transforms/ConstProp/InsertElement.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/InsertElement.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/InsertElement.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/InsertElement.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+; CHECK-LABEL: @test1
+define i32 @test1() {
+  %A = bitcast i32 2139171423 to float
+  %B = insertelement <1 x float> undef, float %A, i32 0
+  %C = extractelement <1 x float> %B, i32 0
+  %D = bitcast float %C to i32
+  ret i32 %D
+; CHECK: ret i32 2139171423
+}
+
+; CHECK-LABEL: @insertelement
+define <4 x i64> @insertelement() {
+  %vec1 = insertelement <4 x i64> undef, i64 -1, i32 0
+  %vec2 = insertelement <4 x i64> %vec1, i64 -2, i32 1
+  %vec3 = insertelement <4 x i64> %vec2, i64 -3, i32 2
+  %vec4 = insertelement <4 x i64> %vec3, i64 -4, i32 3
+  ; CHECK: ret <4 x i64> <i64 -1, i64 -2, i64 -3, i64 -4>
+  ret <4 x i64> %vec4
+}
+
+; CHECK-LABEL: @insertelement_undef
+define <4 x i64> @insertelement_undef() {
+  %vec1 = insertelement <4 x i64> undef, i64 -1, i32 0
+  %vec2 = insertelement <4 x i64> %vec1, i64 -2, i32 1
+  %vec3 = insertelement <4 x i64> %vec2, i64 -3, i32 2
+  %vec4 = insertelement <4 x i64> %vec3, i64 -4, i32 3
+  %vec5 = insertelement <4 x i64> %vec3, i64 -5, i32 4
+  ; CHECK: ret <4 x i64> undef
+  ret <4 x i64> %vec5
+}

Added: llvm/trunk/test/Transforms/ConstProp/avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/avx512.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/avx512.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/avx512.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,490 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+; REQUIRES: x86-registered-target
+
+define i1 @test_avx512_cvts_exact() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvts_exact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> <float 3.0, float undef, float undef, float undef>, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> <float 3.0, float undef, float undef, float undef>, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> <double 7.0, double undef>, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> <double 7.0, double undef>, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 10
+  %cmp13 = icmp eq i64 %sum13, 10
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+define i1 @test_avx512_cvts_exact_max() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvts_exact_max(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> <double 2147483647.0, double undef>, i32 4) nounwind
+  %b = icmp eq i32 %i0, 2147483647
+  ret i1 %b
+}
+
+define i1 @test_avx512_cvts_exact_max_p1() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvts_exact_max_p1(
+; CHECK: call
+entry:
+  %i0 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> <double 2147483648.0, double undef>, i32 4) nounwind
+  %b = icmp eq i32 %i0, 2147483648
+  ret i1 %b
+}
+
+define i1 @test_avx512_cvts_exact_neg_max() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvts_exact_neg_max(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> <double -2147483648.0, double undef>, i32 4) nounwind
+  %b = icmp eq i32 %i0, -2147483648
+  ret i1 %b
+}
+
+define i1 @test_avx512_cvts_exact_neg_max_p1() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvts_exact_neg_max_p1(
+; CHECK: call
+entry:
+  %i0 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> <double -2147483649.0, double undef>, i32 4) nounwind
+  %b = icmp eq i32 %i0, -2147483649
+  ret i1 %b
+}
+
+; Inexact values should not fold as they are dependent on rounding mode
+define i1 @test_avx512_cvts_inexact() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvts_inexact(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %i0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> <float 1.75, float undef, float undef, float undef>, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> <double 1.75, double undef>, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> <double 1.75, double undef>, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 4
+  %cmp13 = icmp eq i64 %sum13, 4
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+; FLT_MAX/DBL_MAX should not fold
+define i1 @test_avx512_cvts_max() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvts_max(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095039, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405311, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; INF should not fold
+define i1 @test_avx512_cvts_inf() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvts_inf(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095040, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405312, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; NAN should not fold
+define i1 @test_avx512_cvts_nan() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvts_nan(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2143289344, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9221120237041090560, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+define i1 @test_avx512_cvtts_exact() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtts_exact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> <float 3.0, float undef, float undef, float undef>, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> <float 3.0, float undef, float undef, float undef>, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> <double 7.0, double undef>, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> <double 7.0, double undef>, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 10
+  %cmp13 = icmp eq i64 %sum13, 10
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+define i1 @test_avx512_cvtts_inexact() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtts_inexact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> <float 1.75, float undef, float undef, float undef>, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> <double 1.75, double undef>, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> <double 1.75, double undef>, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 2
+  %cmp13 = icmp eq i64 %sum13, 2
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+; FLT_MAX/DBL_MAX should not fold
+define i1 @test_avx512_cvtts_max() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtts_max(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095039, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405311, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; INF should not fold
+define i1 @test_avx512_cvtts_inf() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtts_inf(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095040, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405312, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; NAN should not fold
+define i1 @test_avx512_cvtts_nan() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtts_nan(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2143289344, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9221120237041090560, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+define i1 @test_avx512_cvtu_exact() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtu_exact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> <float 3.0, float undef, float undef, float undef>, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> <float 3.0, float undef, float undef, float undef>, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> <double 7.0, double undef>, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> <double 7.0, double undef>, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 10
+  %cmp13 = icmp eq i64 %sum13, 10
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+; Negative values should not fold as they can't be represented in an unsigned int.
+define i1 @test_avx512_cvtu_neg() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtu_neg(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %i0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> <float -3.0, float undef, float undef, float undef>, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> <float -3.0, float undef, float undef, float undef>, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> <double -7.0, double undef>, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> <double -7.0, double undef>, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, -10
+  %cmp13 = icmp eq i64 %sum13, -10
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+define i1 @test_avx512_cvtu_exact_max() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtu_exact_max(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> <double 4294967295.0, double undef>, i32 4) nounwind
+  %b = icmp eq i32 %i0, 4294967295
+  ret i1 %b
+}
+
+define i1 @test_avx512_cvtu_exact_max_p1() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtu_exact_max_p1(
+; CHECK: call
+entry:
+  %i0 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> <double 4294967296.0, double undef>, i32 4) nounwind
+  %b = icmp eq i32 %i0, 4294967296
+  ret i1 %b
+}
+
+; Inexact values should not fold as they are dependent on rounding mode
+define i1 @test_avx512_cvtu_inexact() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtu_inexact(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %i0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> <float 1.75, float undef, float undef, float undef>, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> <float 1.75, float undef, float undef, float undef>, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> <double 1.75, double undef>, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> <double 1.75, double undef>, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 4
+  %cmp13 = icmp eq i64 %sum13, 4
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+; FLT_MAX/DBL_MAX should not fold
+define i1 @test_avx512_cvtu_max() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtu_max(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095039, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405311, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; INF should not fold
+define i1 @test_avx512_cvtu_inf() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtu_inf(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095040, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405312, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; NAN should not fold
+define i1 @test_avx512_cvtu_nan() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvtu_nan(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2143289344, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9221120237041090560, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+define i1 @test_avx512_cvttu_exact() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvttu_exact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> <float 3.0, float undef, float undef, float undef>, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> <float 3.0, float undef, float undef, float undef>, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> <double 7.0, double undef>, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> <double 7.0, double undef>, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 10
+  %cmp13 = icmp eq i64 %sum13, 10
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+define i1 @test_avx512_cvttu_inexact() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvttu_inexact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> <float 1.75, float undef, float undef, float undef>, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> <float 1.75, float undef, float undef, float undef>, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> <double 1.75, double undef>, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> <double 1.75, double undef>, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 2
+  %cmp13 = icmp eq i64 %sum13, 2
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+; FLT_MAX/DBL_MAX should not fold
+define i1 @test_avx512_cvttu_max() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvttu_max(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095039, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405311, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; INF should not fold
+define i1 @test_avx512_cvttu_inf() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvttu_inf(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095040, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405312, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; NAN should not fold
+define i1 @test_avx512_cvttu_nan() nounwind readnone {
+; CHECK-LABEL: @test_avx512_cvttu_nan(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2143289344, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9221120237041090560, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %fm, i32 4) nounwind
+  %i1 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %fm, i32 4) nounwind
+  %i2 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %dm, i32 4) nounwind
+  %i3 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %dm, i32 4) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone
+declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone

Added: llvm/trunk/test/Transforms/ConstProp/basictest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/basictest.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/basictest.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/basictest.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,53 @@
+; RUN: opt < %s -constprop -die -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
+; This is a basic sanity check for constant propagation.  The add instruction
+; should be eliminated.
+define i32 @test1(i1 %B) {
+        br i1 %B, label %BB1, label %BB2
+
+BB1:      
+        %Val = add i32 0, 0
+        br label %BB3
+
+BB2:      
+        br label %BB3
+
+BB3:     
+; CHECK-LABEL: @test1(
+; CHECK: %Ret = phi i32 [ 0, %BB1 ], [ 1, %BB2 ]
+        %Ret = phi i32 [ %Val, %BB1 ], [ 1, %BB2 ] 
+        ret i32 %Ret
+}
+
+
+; PR6197
+define i1 @test2(i8* %f) nounwind {
+entry:
+  %V = icmp ne i8* blockaddress(@test2, %bb), null
+  br label %bb
+bb:
+  ret i1 %V
+  
+; CHECK-LABEL: @test2(
+; CHECK: ret i1 true
+}
+
+define i1 @TNAN() {
+; CHECK-LABEL: @TNAN(
+; CHECK: ret i1 true
+  %A = fcmp uno double 0x7FF8000000000000, 1.000000e+00
+  %B = fcmp uno double 1.230000e+02, 1.000000e+00
+  %C = or i1 %A, %B
+  ret i1 %C
+}
+
+define i128 @vector_to_int_cast() {
+  %A = bitcast <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824> to i128
+  ret i128 %A
+; CHECK-LABEL: @vector_to_int_cast(
+; CHECK: ret i128 85070591750041656499021422275829170176
+}
+  

Added: llvm/trunk/test/Transforms/ConstProp/bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/bitcast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/bitcast.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/bitcast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -constprop -S | FileCheck %s
+; PR2165
+
+define <1 x i64> @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret <1 x i64> <i64 63>
+;
+  %A = bitcast i64 63 to <1 x i64>
+  ret <1 x i64> %A
+}
+
+; Ensure that a FP source operand isn't propagated to an icmp.
+
+ at a = external global i16, align 1
+ at b = external global i16, align 1
+
+define i1 @bad_icmp_constexpr_bitcast() {
+; CHECK-LABEL: @bad_icmp_constexpr_bitcast(
+; CHECK-NEXT:    ret i1 icmp eq (i32 ptrtoint (i16* @a to i32), i32 bitcast (float fadd (float bitcast (i32 ptrtoint (i16* @b to i32) to float), float 2.000000e+00) to i32))
+;
+  %cmp = icmp eq i32 ptrtoint (i16* @a to i32), bitcast (float fadd (float bitcast (i32 ptrtoint (i16* @b to i32) to float), float 2.0) to i32)
+  ret i1 %cmp
+}
+
+; Ensure that an integer source operand isn't propagated to a fcmp.
+
+ at c = external global i16, align 1
+ at d = external global i16, align 1
+
+define i1 @bad_fcmp_constexpr_bitcast() {
+; CHECK-LABEL: @bad_fcmp_constexpr_bitcast(
+; CHECK-NEXT:    ret i1 fcmp oeq (float bitcast (i32 ptrtoint (i16* @c to i32) to float), float bitcast (i32 add (i32 ptrtoint (i16* @d to i32), i32 2) to float))
+;
+  %cmp = fcmp oeq float bitcast (i32 ptrtoint (i16* @c to i32) to float), bitcast (i32 add (i32 ptrtoint (i16* @d to i32), i32 2) to float)
+  ret i1 %cmp
+}
+
+; Ensure that an "ordered and equal" fcmp of a ConstantExpr to itself is not folded, since the ConstantExpr may be a NaN.
+
+define i1 @fcmp_constexpr_oeq(float %conv) {
+; CHECK-LABEL: @fcmp_constexpr_oeq(
+; CHECK-NEXT:    ret i1 fcmp oeq (float bitcast (i32 ptrtoint (i16* @a to i32) to float), float bitcast (i32 ptrtoint (i16* @a to i32) to float))
+;
+  %cmp = fcmp oeq float bitcast (i32 ptrtoint (i16* @a to i32) to float), bitcast (i32 ptrtoint (i16* @a to i32) to float)
+  ret i1 %cmp
+}
+
+; Ensure that an "unordered or not equal" fcmp of a ConstantExpr to itself is not folded, since the ConstantExpr may be a NaN.
+
+define i1 @fcmp_constexpr_une(float %conv) {
+; CHECK-LABEL: @fcmp_constexpr_une(
+; CHECK-NEXT:    ret i1 fcmp une (float bitcast (i32 ptrtoint (i16* @a to i32) to float), float bitcast (i32 ptrtoint (i16* @a to i32) to float))
+;
+  %cmp = fcmp une float bitcast (i32 ptrtoint (i16* @a to i32) to float), bitcast (i32 ptrtoint (i16* @a to i32) to float)
+  ret i1 %cmp
+}
+
+define i1 @fcmp_constexpr_ueq(float %conv) {
+; CHECK-LABEL: @fcmp_constexpr_ueq(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp ueq float bitcast (i32 ptrtoint (i16* @a to i32) to float), bitcast (i32 ptrtoint (i16* @a to i32) to float)
+  ret i1 %cmp
+}
+
+define i1 @fcmp_constexpr_one(float %conv) {
+; CHECK-LABEL: @fcmp_constexpr_one(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp one float bitcast (i32 ptrtoint (i16* @a to i32) to float), bitcast (i32 ptrtoint (i16* @a to i32) to float)
+  ret i1 %cmp
+}

Added: llvm/trunk/test/Transforms/ConstProp/bswap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/bswap.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/bswap.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/bswap.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,41 @@
+; bswap should be constant folded when it is passed a constant argument
+
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+
+declare i32 @llvm.bswap.i32(i32)
+
+declare i64 @llvm.bswap.i64(i64)
+
+declare i80 @llvm.bswap.i80(i80)
+
+; CHECK-LABEL: define i16 @W(
+define i16 @W() {
+        ; CHECK: ret i16 256
+        %Z = call i16 @llvm.bswap.i16( i16 1 )          ; <i16> [#uses=1]
+        ret i16 %Z
+}
+
+; CHECK-LABEL: define i32 @X(
+define i32 @X() {
+        ; CHECK: ret i32 16777216
+        %Z = call i32 @llvm.bswap.i32( i32 1 )          ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+; CHECK-LABEL: define i64 @Y(
+define i64 @Y() {
+        ; CHECK: ret i64 72057594037927936
+        %Z = call i64 @llvm.bswap.i64( i64 1 )          ; <i64> [#uses=1]
+        ret i64 %Z
+}
+
+; CHECK-LABEL: define i80 @Z(
+define i80 @Z() {
+        ; CHECK: ret i80 -450681596205739728166896
+        ;                0xA0908070605040302010
+        %Z = call i80 @llvm.bswap.i80( i80 76151636403560493650080 )
+        ;                                  0x102030405060708090A0
+        ret i80 %Z
+}

Added: llvm/trunk/test/Transforms/ConstProp/calls-math-finite.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/calls-math-finite.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/calls-math-finite.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/calls-math-finite.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,149 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -constprop -S | FileCheck %s
+; RUN: opt < %s -constprop -S -mtriple=unknown-unknown-linux-musl | FileCheck -check-prefix=MUSL %s
+
+; Test to verify constant folding can occur when math routines are mapped
+; to the __<func>_finite versions of functions due to __FINITE_MATH_ONLY__
+; being enabled on headers on Linux. All calls should constant fold away
+; in this test.
+
+target triple = "unknown-unknown-linux-gnu"
+
+declare double @__acos_finite(double) #0
+declare float @__acosf_finite(float) #0
+declare double @__asin_finite(double) #0
+declare float @__asinf_finite(float) #0
+declare double @__atan2_finite(double, double) #0
+declare float @__atan2f_finite(float, float) #0
+declare double @__cosh_finite(double) #0
+declare float @__coshf_finite(float) #0
+declare double @__exp2_finite(double) #0
+declare float @__exp2f_finite(float) #0
+declare double @__exp_finite(double) #0
+declare float @__expf_finite(float) #0
+declare double @__log10_finite(double) #0
+declare float @__log10f_finite(float) #0
+declare double @__log_finite(double) #0
+declare float @__logf_finite(float) #0
+declare double @__pow_finite(double, double) #0
+declare float @__powf_finite(float, float) #0
+declare double @__sinh_finite(double) #0
+declare float @__sinhf_finite(float) #0
+
+attributes #0 = { nounwind readnone }
+
+define void @T() {
+; CHECK-LABEL: @T(
+; CHECK-NEXT:    [[SLOT:%.*]] = alloca double
+; CHECK-NEXT:    [[SLOTF:%.*]] = alloca float
+; CHECK-NEXT:    store double 0.000000e+00, double* [[SLOT]]
+; CHECK-NEXT:    store double 0x3FF921FB54442D18, double* [[SLOT]]
+; CHECK-NEXT:    store double 0x3FE4978FA3269EE1, double* [[SLOT]]
+; CHECK-NEXT:    store double 0x402422A497D6185E, double* [[SLOT]]
+; CHECK-NEXT:    store double 0x403415E5BF6FB106, double* [[SLOT]]
+; CHECK-NEXT:    store double 8.000000e+00, double* [[SLOT]]
+; CHECK-NEXT:    store double 0x3FF193EA7AAD030{{[AB]}}, double* [[SLOT]]
+; CHECK-NEXT:    store double 0x3FDE8927964FD5FD, double* [[SLOT]]
+; CHECK-NEXT:    store double 1.000000e+00, double* [[SLOT]]
+; CHECK-NEXT:    store double 0x40240926E70949AE, double* [[SLOT]]
+; CHECK-NEXT:    store float 0.000000e+00, float* [[SLOTF]]
+; CHECK-NEXT:    store float 0x3FF921FB60000000, float* [[SLOTF]]
+; CHECK-NEXT:    store float 0x3FE4978FA0000000, float* [[SLOTF]]
+; CHECK-NEXT:    store float 0x402422A4A0000000, float* [[SLOTF]]
+; CHECK-NEXT:    store float 0x403415E5C0000000, float* [[SLOTF]]
+; CHECK-NEXT:    store float 8.000000e+00, float* [[SLOTF]]
+; CHECK-NEXT:    store float 0x3FF193EA80000000, float* [[SLOTF]]
+; CHECK-NEXT:    store float 0x3FDE8927A0000000, float* [[SLOTF]]
+; CHECK-NEXT:    store float 8.100000e+01, float* [[SLOTF]]
+; CHECK-NEXT:    store float 0x40240926E0000000, float* [[SLOTF]]
+; CHECK-NEXT:    ret void
+;
+; MUSL-LABEL: @T(
+; MUSL-NEXT:    [[SLOT:%.*]] = alloca double
+; MUSL-NEXT:    [[SLOTF:%.*]] = alloca float
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+; MUSL-NEXT:    call
+; MUSL-NEXT:    store
+
+  %slot = alloca double
+  %slotf = alloca float
+
+  %ACOS = call fast double @__acos_finite(double 1.000000e+00)
+  store double %ACOS, double* %slot
+  %ASIN = call fast double @__asin_finite(double 1.000000e+00)
+  store double %ASIN, double* %slot
+  %ATAN2 = call fast double @__atan2_finite(double 3.000000e+00, double 4.000000e+00)
+  store double %ATAN2, double* %slot
+  %COSH = call fast double @__cosh_finite(double 3.000000e+00)
+  store double %COSH, double* %slot
+  %EXP = call fast double @__exp_finite(double 3.000000e+00)
+  store double %EXP, double* %slot
+  %EXP2 = call fast double @__exp2_finite(double 3.000000e+00)
+  store double %EXP2, double* %slot
+  %LOG = call fast double @__log_finite(double 3.000000e+00)
+  store double %LOG, double* %slot
+  %LOG10 = call fast double @__log10_finite(double 3.000000e+00)
+  store double %LOG10, double* %slot
+  %POW = call fast double @__pow_finite(double 1.000000e+00, double 4.000000e+00)
+  store double %POW, double* %slot
+  %SINH = call fast double @__sinh_finite(double 3.000000e+00)
+  store double %SINH, double* %slot
+
+  %ACOSF = call fast float @__acosf_finite(float 1.000000e+00)
+  store float %ACOSF, float* %slotf
+  %ASINF = call fast float @__asinf_finite(float 1.000000e+00)
+  store float %ASINF, float* %slotf
+  %ATAN2F = call fast float @__atan2f_finite(float 3.000000e+00, float 4.000000e+00)
+  store float %ATAN2F, float* %slotf
+  %COSHF = call fast float @__coshf_finite(float 3.000000e+00)
+  store float %COSHF, float* %slotf
+  %EXPF = call fast float @__expf_finite(float 3.000000e+00)
+  store float %EXPF, float* %slotf
+  %EXP2F = call fast float @__exp2f_finite(float 3.000000e+00)
+  store float %EXP2F, float* %slotf
+  %LOGF = call fast float @__logf_finite(float 3.000000e+00)
+  store float %LOGF, float* %slotf
+  %LOG10F = call fast float @__log10f_finite(float 3.000000e+00)
+  store float %LOG10F, float* %slotf
+  %POWF = call fast float @__powf_finite(float 3.000000e+00, float 4.000000e+00)
+  store float %POWF, float* %slotf
+  %SINHF = call fast float @__sinhf_finite(float 3.000000e+00)
+  store float %SINHF, float* %slotf
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/calls.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/calls.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/calls.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,206 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+; RUN: opt < %s -constprop -disable-simplify-libcalls -S | FileCheck %s --check-prefix=FNOBUILTIN
+
+declare double @acos(double) readnone nounwind
+declare double @asin(double) readnone nounwind
+declare double @atan(double) readnone nounwind
+declare double @atan2(double, double) readnone nounwind
+declare double @ceil(double) readnone nounwind
+declare double @cos(double) readnone nounwind
+declare double @cosh(double) readnone nounwind
+declare double @exp(double) readnone nounwind
+declare double @exp2(double) readnone nounwind
+declare double @fabs(double) readnone nounwind
+declare double @floor(double) readnone nounwind
+declare double @fmod(double, double) readnone nounwind
+declare double @log(double) readnone nounwind
+declare double @log10(double) readnone nounwind
+declare double @pow(double, double) readnone nounwind
+declare double @round(double) readnone nounwind
+declare double @sin(double) readnone nounwind
+declare double @sinh(double) readnone nounwind
+declare double @sqrt(double) readnone nounwind
+declare double @tan(double) readnone nounwind
+declare double @tanh(double) readnone nounwind
+
+declare float @acosf(float) readnone nounwind
+declare float @asinf(float) readnone nounwind
+declare float @atanf(float) readnone nounwind
+declare float @atan2f(float, float) readnone nounwind
+declare float @ceilf(float) readnone nounwind
+declare float @cosf(float) readnone nounwind
+declare float @coshf(float) readnone nounwind
+declare float @expf(float) readnone nounwind
+declare float @exp2f(float) readnone nounwind
+declare float @fabsf(float) readnone nounwind
+declare float @floorf(float) readnone nounwind
+declare float @fmodf(float, float) readnone nounwind
+declare float @logf(float) readnone nounwind
+declare float @log10f(float) readnone nounwind
+declare float @powf(float, float) readnone nounwind
+declare float @roundf(float) readnone nounwind
+declare float @sinf(float) readnone nounwind
+declare float @sinhf(float) readnone nounwind
+declare float @sqrtf(float) readnone nounwind
+declare float @tanf(float) readnone nounwind
+declare float @tanhf(float) readnone nounwind
+
+define double @T() {
+; CHECK-LABEL: @T(
+; FNOBUILTIN-LABEL: @T(
+
+; CHECK-NOT: call
+; CHECK: ret
+  %A = call double @cos(double 0.000000e+00)
+  %B = call double @sin(double 0.000000e+00)
+  %a = fadd double %A, %B
+  %C = call double @tan(double 0.000000e+00)
+  %b = fadd double %a, %C
+  %D = call double @sqrt(double 4.000000e+00)
+  %c = fadd double %b, %D
+
+  %slot = alloca double
+  %slotf = alloca float
+; FNOBUILTIN: call
+  %1 = call double @acos(double 1.000000e+00)
+  store double %1, double* %slot
+; FNOBUILTIN: call
+  %2 = call double @asin(double 1.000000e+00)
+  store double %2, double* %slot
+; FNOBUILTIN: call
+  %3 = call double @atan(double 3.000000e+00)
+  store double %3, double* %slot
+; FNOBUILTIN: call
+  %4 = call double @atan2(double 3.000000e+00, double 4.000000e+00)
+  store double %4, double* %slot
+; FNOBUILTIN: call
+  %5 = call double @ceil(double 3.000000e+00)
+  store double %5, double* %slot
+; FNOBUILTIN: call
+  %6 = call double @cosh(double 3.000000e+00)
+  store double %6, double* %slot
+; FNOBUILTIN: call
+  %7 = call double @exp(double 3.000000e+00)
+  store double %7, double* %slot
+; FNOBUILTIN: call
+  %8 = call double @exp2(double 3.000000e+00)
+  store double %8, double* %slot
+; FNOBUILTIN: call
+  %9 = call double @fabs(double 3.000000e+00)
+  store double %9, double* %slot
+; FNOBUILTIN: call
+  %10 = call double @floor(double 3.000000e+00)
+  store double %10, double* %slot
+; FNOBUILTIN: call
+  %11 = call double @fmod(double 3.000000e+00, double 4.000000e+00)
+  store double %11, double* %slot
+; FNOBUILTIN: call
+  %12 = call double @log(double 3.000000e+00)
+  store double %12, double* %slot
+; FNOBUILTIN: call
+  %13 = call double @log10(double 3.000000e+00)
+  store double %13, double* %slot
+; FNOBUILTIN: call
+  %14 = call double @pow(double 3.000000e+00, double 4.000000e+00)
+  store double %14, double* %slot
+; FNOBUILTIN: call
+  %round_val = call double @round(double 3.000000e+00)
+  store double %round_val, double* %slot
+; FNOBUILTIN: call
+  %15 = call double @sinh(double 3.000000e+00)
+  store double %15, double* %slot
+; FNOBUILTIN: call
+  %16 = call double @tanh(double 3.000000e+00)
+  store double %16, double* %slot
+; FNOBUILTIN: call
+  %17 = call float @acosf(float 1.000000e+00)
+  store float %17, float* %slotf
+; FNOBUILTIN: call
+  %18 = call float @asinf(float 1.000000e+00)
+  store float %18, float* %slotf
+; FNOBUILTIN: call
+  %19 = call float @atanf(float 3.000000e+00)
+  store float %19, float* %slotf
+; FNOBUILTIN: call
+  %20 = call float @atan2f(float 3.000000e+00, float 4.000000e+00)
+  store float %20, float* %slotf
+; FNOBUILTIN: call
+  %21 = call float @ceilf(float 3.000000e+00)
+  store float %21, float* %slotf
+; FNOBUILTIN: call
+  %22 = call float @cosf(float 3.000000e+00)
+  store float %22, float* %slotf
+; FNOBUILTIN: call
+  %23 = call float @coshf(float 3.000000e+00)
+  store float %23, float* %slotf
+; FNOBUILTIN: call
+  %24 = call float @expf(float 3.000000e+00)
+  store float %24, float* %slotf
+; FNOBUILTIN: call
+  %25 = call float @exp2f(float 3.000000e+00)
+  store float %25, float* %slotf
+; FNOBUILTIN: call
+  %26 = call float @fabsf(float 3.000000e+00)
+  store float %26, float* %slotf
+; FNOBUILTIN: call
+  %27 = call float @floorf(float 3.000000e+00)
+  store float %27, float* %slotf
+; FNOBUILTIN: call
+  %28 = call float @fmodf(float 3.000000e+00, float 4.000000e+00)
+  store float %28, float* %slotf
+; FNOBUILTIN: call
+  %29 = call float @logf(float 3.000000e+00)
+  store float %29, float* %slotf
+; FNOBUILTIN: call
+  %30 = call float @log10f(float 3.000000e+00)
+  store float %30, float* %slotf
+; FNOBUILTIN: call
+  %31 = call float @powf(float 3.000000e+00, float 4.000000e+00)
+  store float %31, float* %slotf
+; FNOBUILTIN: call
+  %roundf_val = call float @roundf(float 3.000000e+00)
+  store float %roundf_val, float* %slotf
+; FNOBUILTIN: call
+  %32 = call float @sinf(float 3.000000e+00)
+  store float %32, float* %slotf
+; FNOBUILTIN: call
+  %33 = call float @sinhf(float 3.000000e+00)
+  store float %33, float* %slotf
+; FNOBUILTIN: call
+  %34 = call float @sqrtf(float 3.000000e+00)
+  store float %34, float* %slotf
+; FNOBUILTIN: call
+  %35 = call float @tanf(float 3.000000e+00)
+  store float %35, float* %slotf
+; FNOBUILTIN: call
+  %36 = call float @tanhf(float 3.000000e+00)
+  store float %36, float* %slotf
+
+; FNOBUILTIN: ret
+
+  ; PR9315
+  %E = call double @exp2(double 4.0)
+  %d = fadd double %c, %E 
+  ret double %d
+}
+
+define double @test_intrinsic_pow() nounwind uwtable ssp {
+entry:
+; CHECK-LABEL: @test_intrinsic_pow(
+; CHECK-NOT: call
+; CHECK: ret
+  %0 = call double @llvm.pow.f64(double 1.500000e+00, double 3.000000e+00)
+  ret double %0
+}
+
+define float @test_intrinsic_pow_f32_overflow() nounwind uwtable ssp {
+entry:
+; CHECK-LABEL: @test_intrinsic_pow_f32_overflow(
+; CHECK-NOT: call
+; CHECK: ret float 0x7FF0000000000000
+  %0 = call float @llvm.pow.f32(float 40.0, float 50.0)
+  ret float %0
+}
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
+declare float @llvm.pow.f32(float, float) nounwind readonly

Added: llvm/trunk/test/Transforms/ConstProp/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/cast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/cast.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/cast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+; Overflow on a float to int or int to float conversion is undefined (PR21130).
+
+define i8 @overflow_fptosi() {
+; CHECK-LABEL: @overflow_fptosi(
+; CHECK-NEXT:    ret i8 undef
+;
+  %i = fptosi double 1.56e+02 to i8
+  ret i8 %i
+}
+
+define i8 @overflow_fptoui() {
+; CHECK-LABEL: @overflow_fptoui(
+; CHECK-NEXT:    ret i8 undef
+;
+  %i = fptoui double 2.56e+02 to i8
+  ret i8 %i
+}
+
+; The maximum float is approximately 2 ** 128 which is 3.4E38.
+; The constant below is 4E38. Use a 130 bit integer to hold that
+; number; 129-bits for the value + 1 bit for the sign.
+
+define float @overflow_uitofp() {
+; CHECK-LABEL: @overflow_uitofp(
+; CHECK-NEXT:    ret float 0x7FF0000000000000
+;
+  %i = uitofp i130 400000000000000000000000000000000000000 to float
+  ret float %i
+}
+
+define float @overflow_sitofp() {
+; CHECK-LABEL: @overflow_sitofp(
+; CHECK-NEXT:    ret float 0x7FF0000000000000
+;
+  %i = sitofp i130 400000000000000000000000000000000000000 to float
+  ret float %i
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/constant-expr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/constant-expr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/constant-expr.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/constant-expr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,111 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+ at X = external global i8
+ at Y = external global i8
+ at Z = external global i8
+
+ at A = global i1 add (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @A = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+ at B = global i1 sub (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)), align 2
+; CHECK: @B = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+ at C = global i1 mul (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @C = global i1 and (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+
+ at D = global i1 sdiv (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @D = global i1 icmp ult (i8* @X, i8* @Y)
+ at E = global i1 udiv (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @E = global i1 icmp ult (i8* @X, i8* @Y)
+ at F = global i1 srem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @F = global i1 false 
+ at G = global i1 urem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @G = global i1 false 
+
+ at H = global i1 icmp ule (i32* bitcast (i8* @X to i32*), i32* bitcast (i8* @Y to i32*))
+; CHECK: @H = global i1 icmp ule (i8* @X, i8* @Y)
+
+ at I = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 false)
+; CHECK: @I = global i1 icmp ult (i8* @X, i8* @Y)
+ at J = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 true)
+; CHECK: @J = global i1 icmp uge (i8* @X, i8* @Y)
+
+ at K = global i1 icmp eq (i1 icmp ult (i8* @X, i8* @Y), i1 false)
+; CHECK: @K = global i1 icmp uge (i8* @X, i8* @Y)
+ at L = global i1 icmp eq (i1 icmp ult (i8* @X, i8* @Y), i1 true)
+; CHECK: @L = global i1 icmp ult (i8* @X, i8* @Y)
+ at M = global i1 icmp ne (i1 icmp ult (i8* @X, i8* @Y), i1 true)
+; CHECK: @M = global i1 icmp uge (i8* @X, i8* @Y)
+ at N = global i1 icmp ne (i1 icmp ult (i8* @X, i8* @Y), i1 false)
+; CHECK: @N = global i1 icmp ult (i8* @X, i8* @Y)
+
+ at O = global i1 icmp eq (i32 zext (i1 icmp ult (i8* @X, i8* @Y) to i32), i32 0)
+; CHECK: @O = global i1 icmp uge (i8* @X, i8* @Y)
+
+
+
+; PR5176
+
+; CHECK: @T1 = global i1 true
+ at T1 = global i1 icmp eq (i64 and (i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 192)), i256 64) to i64), i64 1), i64 0)
+
+; CHECK: @T2 = global i1* @B
+ at T2 = global i1* inttoptr (i64 add (i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 192)), i256 192) to i64), i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 192)), i256 128) to i64)) to i1*)
+
+; CHECK: @T3 = global i64 add (i64 ptrtoint (i1* @B to i64), i64 -1)
+ at T3 = global i64 add (i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 192)), i256 64) to i64), i64 -1)
+
+; CHECK: @T4 = global i1* @B
+ at T4 = global i1* inttoptr (i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 192)), i256 64) to i64) to i1*)
+
+; CHECK: @T5 = global i1* @A
+ at T5 = global i1* inttoptr (i64 add (i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 192)), i256 192) to i64), i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 192)), i256 128) to i64)) to i1*)
+
+
+
+; PR6096
+
+; No check line. This used to crash llvm-as.
+ at T6 = global <2 x i1> fcmp ole (<2 x float> fdiv (<2 x float> undef, <2 x float> <float 1.000000e+00, float 1.000000e+00>), <2 x float> zeroinitializer)
+
+
+; PR9011
+
+ at pr9011_1 = constant <4 x i32> zext (<4 x i8> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_1 = constant <4 x i32> zeroinitializer
+ at pr9011_2 = constant <4 x i32> sext (<4 x i8> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_2 = constant <4 x i32> zeroinitializer
+ at pr9011_3 = constant <4 x i32> bitcast (<16 x i8> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_3 = constant <4 x i32> zeroinitializer
+ at pr9011_4 = constant <4 x float> uitofp (<4 x i8> zeroinitializer to <4 x float>)
+; CHECK: pr9011_4 = constant <4 x float> zeroinitializer
+ at pr9011_5 = constant <4 x float> sitofp (<4 x i8> zeroinitializer to <4 x float>)
+; CHECK: pr9011_5 = constant <4 x float> zeroinitializer
+ at pr9011_6 = constant <4 x i32> fptosi (<4 x float> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_6 = constant <4 x i32> zeroinitializer
+ at pr9011_7 = constant <4 x i32> fptoui (<4 x float> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_7 = constant <4 x i32> zeroinitializer
+ at pr9011_8 = constant <4 x float> fptrunc (<4 x double> zeroinitializer to <4 x float>)
+; CHECK: pr9011_8 = constant <4 x float> zeroinitializer
+ at pr9011_9 = constant <4 x double> fpext (<4 x float> zeroinitializer to <4 x double>)
+; CHECK: pr9011_9 = constant <4 x double> zeroinitializer
+
+ at pr9011_10 = constant <4 x double> bitcast (i256 0 to <4 x double>)
+; CHECK: pr9011_10 = constant <4 x double> zeroinitializer
+ at pr9011_11 = constant <4 x float> bitcast (i128 0 to <4 x float>)
+; CHECK: pr9011_11 = constant <4 x float> zeroinitializer
+ at pr9011_12 = constant <4 x i32> bitcast (i128 0 to <4 x i32>)
+; CHECK: pr9011_12 = constant <4 x i32> zeroinitializer
+ at pr9011_13 = constant i256 bitcast (<4 x double> zeroinitializer to i256)
+; CHECK: pr9011_13 = constant i256 0
+ at pr9011_14 = constant i128 bitcast (<4 x float> zeroinitializer to i128)
+; CHECK: pr9011_14 = constant i128 0
+ at pr9011_15 = constant i128 bitcast (<4 x i32> zeroinitializer to i128)
+; CHECK: pr9011_15 = constant i128 0
+
+ at select = internal constant
+          i32 select (i1 icmp ult (i32 ptrtoint (i8* @X to i32),
+                                   i32 ptrtoint (i8* @Y to i32)),
+            i32 select (i1 icmp ult (i32 ptrtoint (i8* @X to i32),
+                                     i32 ptrtoint (i8* @Y to i32)),
+               i32 10, i32 20),
+            i32 30)
+; CHECK: select = internal constant i32 select {{.*}} i32 10, i32 30

Added: llvm/trunk/test/Transforms/ConstProp/convert-from-fp16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/convert-from-fp16.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/convert-from-fp16.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/convert-from-fp16.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,97 @@
+; RUN: opt -constprop -S < %s | FileCheck %s
+
+; Verify that we don't crash with an assertion failure when constant folding
+; a call to intrinsic 'convert.from.fp16' if the return type is not 'float'.
+
+define float @fold_from_fp16_to_fp32() {
+; CHECK-LABEL: @fold_from_fp16_to_fp32
+; CHECK: ret float 0.000000e+00
+entry:
+  %0 = call float @llvm.convert.from.fp16.f32(i16 0)
+  ret float %0
+}
+
+define double @fold_from_fp16_to_fp64() {
+; CHECK-LABEL: @fold_from_fp16_to_fp64
+; CHECK: ret double 0.000000e+00
+entry:
+  %0 = call double @llvm.convert.from.fp16.f64(i16 0)
+  ret double %0
+}
+
+define x86_fp80 @fold_from_fp16_to_fp80() {
+; CHECK-LABEL: @fold_from_fp16_to_fp80
+; CHECK: ret x86_fp80 0xK00000000000000000000
+entry:
+  %0 = call x86_fp80 @llvm.convert.from.fp16.f80(i16 0)
+  ret x86_fp80 %0
+}
+
+define fp128 @fold_from_fp16_to_fp128() {
+; CHECK-LABEL: @fold_from_fp16_to_fp128
+; CHECK: ret fp128 0xL00000000000000000000000000000000
+entry:
+  %0 = call fp128 @llvm.convert.from.fp16.f128(i16 0)
+  ret fp128 %0
+}
+
+define ppc_fp128 @fold_from_fp16_to_ppcfp128() {
+; CHECK-LABEL: @fold_from_fp16_to_ppcfp128
+; CHECK: ret ppc_fp128 0xM00000000000000000000000000000000
+entry:
+  %0 = call ppc_fp128 @llvm.convert.from.fp16.ppcf128(i16 0)
+  ret ppc_fp128 %0
+}
+
+define float @fold_from_fp16_to_fp32_b() {
+; CHECK-LABEL: @fold_from_fp16_to_fp32_b
+; CHECK: ret float 4.000000e+00
+entry:
+  %0 = call i16 @llvm.convert.to.fp16.f64(double 4.0)
+  %1 = call float @llvm.convert.from.fp16.f32(i16 %0)
+  ret float %1
+}
+
+define double @fold_from_fp16_to_fp64_b() {
+; CHECK-LABEL: @fold_from_fp16_to_fp64_b
+; CHECK: ret double 4.000000e+00
+entry:
+  %0 = call i16 @llvm.convert.to.fp16.f64(double 4.0)
+  %1 = call double @llvm.convert.from.fp16.f64(i16 %0)
+  ret double %1
+}
+
+define x86_fp80 @fold_from_fp16_to_fp80_b() {
+; CHECK-LABEL: @fold_from_fp16_to_fp80_b
+; CHECK: ret x86_fp80 0xK40018000000000000000
+entry:
+  %0 = call i16 @llvm.convert.to.fp16.f64(double 4.0)
+  %1 = call x86_fp80 @llvm.convert.from.fp16.f80(i16 %0)
+  ret x86_fp80 %1
+}
+
+define fp128 @fold_from_fp16_to_fp128_b() {
+; CHECK-LABEL: @fold_from_fp16_to_fp128_b
+; CHECK: ret fp128 0xL00000000000000004001000000000000
+entry:
+  %0 = call i16 @llvm.convert.to.fp16.f64(double 4.0)
+  %1 = call fp128 @llvm.convert.from.fp16.f128(i16 %0)
+  ret fp128 %1
+}
+
+define ppc_fp128 @fold_from_fp16_to_ppcfp128_b() {
+; CHECK-LABEL: @fold_from_fp16_to_ppcfp128_b
+; CHECK: ret ppc_fp128 0xM40100000000000000000000000000000
+entry:
+  %0 = call i16 @llvm.convert.to.fp16.f64(double 4.0)
+  %1 = call ppc_fp128 @llvm.convert.from.fp16.ppcf128(i16 %0)
+  ret ppc_fp128 %1
+}
+
+
+declare i16 @llvm.convert.to.fp16.f64(double)
+declare float @llvm.convert.from.fp16.f32(i16)
+declare double @llvm.convert.from.fp16.f64(i16)
+declare x86_fp80 @llvm.convert.from.fp16.f80(i16)
+declare fp128 @llvm.convert.from.fp16.f128(i16)
+declare ppc_fp128 @llvm.convert.from.fp16.ppcf128(i16)

Added: llvm/trunk/test/Transforms/ConstProp/div-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/div-zero.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/div-zero.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/div-zero.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | grep "ret i32 0"
+; PR4424
+declare void @ext()
+
+define i32 @foo(i32 %ptr) {
+entry:
+        %zero = sub i32 %ptr, %ptr              ; <i32> [#uses=1]
+        %div_zero = sdiv i32 %zero, ptrtoint (i32* getelementptr (i32, i32* null,
+i32 1) to i32)             ; <i32> [#uses=1]
+        ret i32 %div_zero
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/extractvalue.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/extractvalue.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/extractvalue.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/extractvalue.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+%struct = type { i32, [4 x i8] }
+
+define i32 @test1() {
+  %A = extractvalue %struct { i32 2, [4 x i8] c"foo\00" }, 0
+  ret i32 %A
+; CHECK-LABEL: @test1(
+; CHECK: ret i32 2
+}
+
+define i8 @test2() {
+  %A = extractvalue %struct { i32 2, [4 x i8] c"foo\00" }, 1, 2
+  ret i8 %A
+; CHECK-LABEL: @test2(
+; CHECK: ret i8 111
+}
+
+define i32 @test3() {
+  %A = extractvalue [3 x %struct] [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], 1, 0
+  ret i32 %A
+; CHECK-LABEL: @test3(
+; CHECK: ret i32 1
+}
+
+define i32 @zeroinitializer-test1() {
+  %A = extractvalue %struct zeroinitializer, 0
+  ret i32 %A
+; CHECK: @zeroinitializer-test1
+; CHECK: ret i32 0
+}
+
+define i8 @zeroinitializer-test2() {
+  %A = extractvalue %struct zeroinitializer, 1, 2
+  ret i8 %A
+; CHECK: @zeroinitializer-test2
+; CHECK: ret i8 0
+}
+
+define i32 @zeroinitializer-test3() {
+  %A = extractvalue [3 x %struct] zeroinitializer, 1, 0
+  ret i32 %A
+; CHECK: @zeroinitializer-test3
+; CHECK: ret i32 0
+}
+
+define i32 @undef-test1() {
+  %A = extractvalue %struct undef, 0
+  ret i32 %A
+; CHECK: @undef-test1
+; CHECK: ret i32 undef
+}
+
+define i8 @undef-test2() {
+  %A = extractvalue %struct undef, 1, 2
+  ret i8 %A
+; CHECK: @undef-test2
+; CHECK: ret i8 undef
+}
+
+define i32 @undef-test3() {
+  %A = extractvalue [3 x %struct] undef, 1, 0
+  ret i32 %A
+; CHECK: @undef-test3
+; CHECK: ret i32 undef
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/float-to-ptr-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/float-to-ptr-cast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/float-to-ptr-cast.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/float-to-ptr-cast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+define i32* @test1() {
+        %X = inttoptr i64 0 to i32*             ; <i32*> [#uses=1]
+        ret i32* %X
+}
+
+; CHECK:  ret i32* null
+
+define i32* @test2() {
+        ret i32* null
+}
+
+; CHECK:  ret i32* null
+

Added: llvm/trunk/test/Transforms/ConstProp/insertvalue.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/insertvalue.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/insertvalue.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/insertvalue.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,86 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+%struct = type { i32, [4 x i8] }
+
+define %struct @test1() {
+  %A = insertvalue %struct { i32 2, [4 x i8] c"foo\00" }, i32 1, 0
+  ret %struct %A
+; CHECK-LABEL: @test1(
+; CHECK: ret %struct { i32 1, [4 x i8] c"foo\00" }
+}
+
+define %struct @test2() {
+  %A = insertvalue %struct { i32 2, [4 x i8] c"foo\00" }, i8 1, 1, 2
+  ret %struct %A
+; CHECK-LABEL: @test2(
+; CHECK: ret %struct { i32 2, [4 x i8] c"fo\01\00" }
+}
+
+define [3 x %struct] @test3() {
+  %A = insertvalue [3 x %struct] [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], i32 -1, 1, 0
+  ret [3 x %struct] %A
+; CHECK-LABEL: @test3(
+; CHECK:ret [3 x %struct] [%struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 -1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" }]
+}
+
+define %struct @zeroinitializer-test1() {
+  %A = insertvalue %struct zeroinitializer, i32 1, 0
+  ret %struct %A
+; CHECK: @zeroinitializer-test1
+; CHECK: ret %struct { i32 1, [4 x i8] zeroinitializer }
+}
+
+define %struct @zeroinitializer-test2() {
+  %A = insertvalue %struct zeroinitializer, i8 1, 1, 2
+  ret %struct %A
+; CHECK: @zeroinitializer-test2
+; CHECK: ret %struct { i32 0, [4 x i8] c"\00\00\01\00" }
+}
+
+define [3 x %struct] @zeroinitializer-test3() {
+  %A = insertvalue [3 x %struct] zeroinitializer, i32 1, 1, 0
+  ret [3 x %struct] %A
+; CHECK: @zeroinitializer-test3
+; CHECK: ret [3 x %struct] [%struct zeroinitializer, %struct { i32 1, [4 x i8] zeroinitializer }, %struct zeroinitializer]
+}
+
+define %struct @undef-test1() {
+  %A = insertvalue %struct undef, i32 1, 0
+  ret %struct %A
+; CHECK: @undef-test1
+; CHECK: ret %struct { i32 1, [4 x i8] undef }
+}
+
+define %struct @undef-test2() {
+  %A = insertvalue %struct undef, i8 0, 1, 2
+  ret %struct %A
+; CHECK: @undef-test2
+; CHECK: ret %struct { i32 undef, [4 x i8] [i8 undef, i8 undef, i8 0, i8 undef] }
+}
+
+define [3 x %struct] @undef-test3() {
+  %A = insertvalue [3 x %struct] undef, i32 0, 1, 0
+  ret [3 x %struct] %A
+; CHECK: @undef-test3
+; CHECK: ret [3 x %struct] [%struct undef, %struct { i32 0, [4 x i8] undef }, %struct undef]
+}
+
+define i32 @test-float-Nan() {
+  %A = bitcast i32 2139171423 to float
+  %B = insertvalue [1 x float] undef, float %A, 0
+  %C = extractvalue [1 x float] %B, 0
+  %D = bitcast float %C to i32
+  ret i32 %D
+; CHECK: @test-float-Nan
+; CHECK: ret i32 2139171423
+}
+
+define i16 @test-half-Nan() {
+  %A = bitcast i16 32256 to half
+  %B = insertvalue [1 x half] undef, half %A, 0
+  %C = extractvalue [1 x half] %B, 0
+  %D = bitcast half %C to i16
+  ret i16 %D
+; CHECK: @test-half-Nan
+; CHECK: ret i16 32256
+}

Added: llvm/trunk/test/Transforms/ConstProp/loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/loads.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/loads.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/loads.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,310 @@
+; RUN: opt < %s -data-layout="e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=LE
+; RUN: opt < %s -data-layout="E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=BE
+
+; {{ 0xDEADBEEF, 0xBA }, 0xCAFEBABE}
+ at g1 = constant {{i32,i8},i32} {{i32,i8} { i32 -559038737, i8 186 }, i32 -889275714 }
+ at g2 = constant double 1.0
+; { 0x7B, 0x06B1BFF8 }
+ at g3 = constant {i64, i64} { i64 123, i64 112312312 }
+
+; Simple load
+define i32 @test1() {
+  %r = load i32, i32* getelementptr ({{i32,i8},i32}, {{i32,i8},i32}* @g1, i32 0, i32 0, i32 0)
+  ret i32 %r
+
+; 0xDEADBEEF
+; LE-LABEL: @test1(
+; LE: ret i32 -559038737
+
+; 0xDEADBEEF
+; BE-LABEL: @test1(
+; BE: ret i32 -559038737
+}
+
+; PR3152
+; Load of first 16 bits of 32-bit value.
+define i16 @test2() {
+  %r = load i16, i16* bitcast(i32* getelementptr ({{i32,i8},i32}, {{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*)
+  ret i16 %r
+
+; 0xBEEF
+; LE-LABEL: @test2(
+; LE: ret i16 -16657
+
+; 0xDEAD
+; BE-LABEL: @test2(
+; BE: ret i16 -8531
+}
+
+define i16 @test2_addrspacecast() {
+  %r = load i16, i16 addrspace(1)* addrspacecast(i32* getelementptr ({{i32,i8},i32}, {{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16 addrspace(1)*)
+  ret i16 %r
+
+; FIXME: Should be able to load through a constant addrspacecast.
+; 0xBEEF
+; LE-LABEL: @test2_addrspacecast(
+; XLE: ret i16 -16657
+; LE: load i16, i16 addrspace(1)* addrspacecast
+
+; 0xDEAD
+; BE-LABEL: @test2_addrspacecast(
+; XBE: ret i16 -8531
+; BE: load i16, i16 addrspace(1)* addrspacecast
+}
+
+; Load of second 16 bits of 32-bit value.
+define i16 @test3() {
+  %r = load i16, i16* getelementptr(i16, i16* bitcast(i32* getelementptr ({{i32,i8},i32}, {{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 1)
+  ret i16 %r
+
+; 0xDEAD
+; LE-LABEL: @test3(
+; LE: ret i16 -8531
+
+; 0xBEEF
+; BE-LABEL: @test3(
+; BE: ret i16 -16657
+}
+
+; Load of 8 bit field + tail padding.
+define i16 @test4() {
+  %r = load i16, i16* getelementptr(i16, i16* bitcast(i32* getelementptr ({{i32,i8},i32}, {{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 2)
+  ret i16 %r
+
+; 0x00BA
+; LE-LABEL: @test4(
+; LE: ret i16 186
+
+; 0xBA00
+; BE-LABEL: @test4(
+; BE: ret i16 -17920
+}
+
+; Load of double bits.
+define i64 @test6() {
+  %r = load i64, i64* bitcast(double* @g2 to i64*)
+  ret i64 %r
+
+; 0x3FF_0000000000000
+; LE-LABEL: @test6(
+; LE: ret i64 4607182418800017408
+
+; 0x3FF_0000000000000
+; BE-LABEL: @test6(
+; BE: ret i64 4607182418800017408
+}
+
+; Load of double bits.
+define i16 @test7() {
+  %r = load i16, i16* bitcast(double* @g2 to i16*)
+  ret i16 %r
+
+; 0x0000
+; LE-LABEL: @test7(
+; LE: ret i16 0
+
+; 0x3FF0
+; BE-LABEL: @test7(
+; BE: ret i16 16368
+}
+
+; Double load.
+define double @test8() {
+  %r = load double, double* bitcast({{i32,i8},i32}* @g1 to double*)
+  ret double %r
+
+; LE-LABEL: @test8(
+; LE: ret double 0xBADEADBEEF
+
+; BE-LABEL: @test8(
+; BE: ret double 0xDEADBEEFBA000000
+}
+
+
+; i128 load.
+define i128 @test9() {
+  %r = load i128, i128* bitcast({i64, i64}* @g3 to i128*)
+  ret i128 %r
+
+; 0x00000000_06B1BFF8_00000000_0000007B
+; LE-LABEL: @test9(
+; LE: ret i128 2071796475790618158476296315
+
+; 0x00000000_0000007B_00000000_06B1BFF8
+; BE-LABEL: @test9(
+; BE: ret i128 2268949521066387161080
+}
+
+; vector load.
+define <2 x i64> @test10() {
+  %r = load <2 x i64>, <2 x i64>* bitcast({i64, i64}* @g3 to <2 x i64>*)
+  ret <2 x i64> %r
+
+; LE-LABEL: @test10(
+; LE: ret <2 x i64> <i64 123, i64 112312312>
+
+; BE-LABEL: @test10(
+; BE: ret <2 x i64> <i64 123, i64 112312312>
+}
+
+
+; PR5287
+; { 0xA1, 0x08 }
+ at g4 = internal constant { i8, i8 } { i8 -95, i8 8 }
+
+define i16 @test11() nounwind {
+entry:
+  %a = load i16, i16* bitcast ({ i8, i8 }* @g4 to i16*)
+  ret i16 %a
+
+; 0x08A1
+; LE-LABEL: @test11(
+; LE: ret i16 2209
+
+; 0xA108
+; BE-LABEL: @test11(
+; BE: ret i16 -24312
+}
+
+
+; PR5551
+ at test12g = private constant [6 x i8] c"a\00b\00\00\00"
+
+define i16 @test12() {
+  %a = load i16, i16* getelementptr inbounds ([3 x i16], [3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1)
+  ret i16 %a
+
+; 0x0062
+; LE-LABEL: @test12(
+; LE: ret i16 98
+
+; 0x6200
+; BE-LABEL: @test12(
+; BE: ret i16 25088
+}
+
+
+; PR5978
+ at g5 = constant i8 4
+define i1 @test13() {
+  %A = load i1, i1* bitcast (i8* @g5 to i1*)
+  ret i1 %A
+
+; LE-LABEL: @test13(
+; LE: ret i1 false
+
+; BE-LABEL: @test13(
+; BE: ret i1 false
+}
+
+ at g6 = constant [2 x i8*] [i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*)]
+define i64 @test14() nounwind {
+entry:
+  %tmp = load i64, i64* bitcast ([2 x i8*]* @g6 to i64*)
+  ret i64 %tmp
+
+; LE-LABEL: @test14(
+; LE: ret i64 1
+
+; BE-LABEL: @test14(
+; BE: ret i64 1
+}
+
+; Check with address space pointers
+ at g6_as1 = constant [2 x i8 addrspace(1)*] [i8 addrspace(1)* inttoptr (i16 1 to i8 addrspace(1)*), i8 addrspace(1)* inttoptr (i16 2 to i8 addrspace(1)*)]
+define i16 @test14_as1() nounwind {
+entry:
+  %tmp = load i16, i16* bitcast ([2 x i8 addrspace(1)*]* @g6_as1 to i16*)
+  ret i16 %tmp
+
+; LE: @test14_as1
+; LE: ret i16 1
+
+; BE: @test14_as1
+; BE: ret i16 1
+}
+
+define i64 @test15() nounwind {
+entry:
+  %tmp = load i64, i64* bitcast (i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @g6, i32 0, i64 1) to i64*)
+  ret i64 %tmp
+
+; LE-LABEL: @test15(
+; LE: ret i64 2
+
+; BE-LABEL: @test15(
+; BE: ret i64 2
+}
+
+ at gv7 = constant [4 x i8*] [i8* null, i8* inttoptr (i64 -14 to i8*), i8* null, i8* null]
+define i64 @test16.1() {
+  %v = load i64, i64* bitcast ([4 x i8*]* @gv7 to i64*), align 8
+  ret i64 %v
+
+; LE-LABEL: @test16.1(
+; LE: ret i64 0
+
+; BE-LABEL: @test16.1(
+; BE: ret i64 0
+}
+
+define i64 @test16.2() {
+  %v = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @gv7, i64 0, i64 1) to i64*), align 8
+  ret i64 %v
+
+; LE-LABEL: @test16.2(
+; LE: ret i64 -14
+
+; BE-LABEL: @test16.2(
+; BE: ret i64 -14
+}
+
+define i64 @test16.3() {
+  %v = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @gv7, i64 0, i64 2) to i64*), align 8
+  ret i64 %v
+
+; LE-LABEL: @test16.3(
+; LE: ret i64 0
+
+; BE-LABEL: @test16.3(
+; BE: ret i64 0
+}
+
+ at g7 = constant {[0 x i32], [0 x i8], {}*} { [0 x i32] undef, [0 x i8] undef, {}* null }
+
+define i64* @test_leading_zero_size_elems() {
+  %v = load i64*, i64** bitcast ({[0 x i32], [0 x i8], {}*}* @g7 to i64**)
+  ret i64* %v
+
+; LE-LABEL: @test_leading_zero_size_elems(
+; LE: ret i64* null
+
+; BE-LABEL: @test_leading_zero_size_elems(
+; BE: ret i64* null
+}
+
+ at g8 = constant {[4294967295 x [0 x i32]], i64} { [4294967295 x [0 x i32]] undef, i64 123 }
+
+define i64 @test_leading_zero_size_elems_big() {
+  %v = load i64, i64* bitcast ({[4294967295 x [0 x i32]], i64}* @g8 to i64*)
+  ret i64 %v
+
+; LE-LABEL: @test_leading_zero_size_elems_big(
+; LE: ret i64 123
+
+; BE-LABEL: @test_leading_zero_size_elems_big(
+; BE: ret i64 123
+}
+
+ at g9 = constant [4294967295 x [0 x i32]] zeroinitializer
+
+define i64 @test_array_of_zero_size_array() {
+  %v = load i64, i64* bitcast ([4294967295 x [0 x i32]]* @g9 to i64*)
+  ret i64 %v
+
+; LE-LABEL: @test_array_of_zero_size_array(
+; LE: ret i64 0
+
+; BE-LABEL: @test_array_of_zero_size_array(
+; BE: ret i64 0
+}

Added: llvm/trunk/test/Transforms/ConstProp/logicaltest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/logicaltest.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/logicaltest.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/logicaltest.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,35 @@
+; Ensure constant propagation of logical instructions is working correctly.
+
+; RUN: opt < %s -constprop -die -S | FileCheck %s
+; CHECK-NOT:     {{and|or|xor}}
+
+define i32 @test1() {
+        %R = and i32 4, 1234            ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i1 @test1.upgrd.1() {
+        %R = and i1 true, false         ; <i1> [#uses=1]
+        ret i1 %R
+}
+
+define i32 @test2() {
+        %R = or i32 4, 1234             ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i1 @test2.upgrd.2() {
+        %R = or i1 true, false          ; <i1> [#uses=1]
+        ret i1 %R
+}
+
+define i32 @test3() {
+        %R = xor i32 4, 1234            ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i1 @test3.upgrd.3() {
+        %R = xor i1 true, false         ; <i1> [#uses=1]
+        ret i1 %R
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/overflow-ops.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,250 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.umul.with.overflow.i8(i8, i8)
+
+declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.smul.with.overflow.i8(i8, i8)
+
+;;-----------------------------
+;; uadd
+;;-----------------------------
+
+define {i8, i1} @uadd_1() nounwind {
+; CHECK-LABEL: @uadd_1(
+; CHECK-NEXT:    ret { i8, i1 } { i8 -114, i1 false }
+;
+  %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 42, i8 100)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @uadd_2() nounwind {
+; CHECK-LABEL: @uadd_2(
+; CHECK-NEXT:    ret { i8, i1 } { i8 6, i1 true }
+;
+  %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 142, i8 120)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @uadd_undef() nounwind {
+; CHECK-LABEL: @uadd_undef(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 142, i8 undef)
+  ret {i8, i1} %t
+}
+
+;;-----------------------------
+;; usub
+;;-----------------------------
+
+define {i8, i1} @usub_1() nounwind {
+; CHECK-LABEL: @usub_1(
+; CHECK-NEXT:    ret { i8, i1 } { i8 2, i1 false }
+;
+  %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 4, i8 2)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @usub_2() nounwind {
+; CHECK-LABEL: @usub_2(
+; CHECK-NEXT:    ret { i8, i1 } { i8 -2, i1 true }
+;
+  %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 4, i8 6)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @usub_undef() nounwind {
+; CHECK-LABEL: @usub_undef(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 4, i8 undef)
+  ret {i8, i1} %t
+}
+
+;;-----------------------------
+;; umul
+;;-----------------------------
+
+define {i8, i1} @umul_1() nounwind {
+; CHECK-LABEL: @umul_1(
+; CHECK-NEXT:    ret { i8, i1 } { i8 44, i1 true }
+;
+  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 100, i8 3)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @umul_2() nounwind {
+; CHECK-LABEL: @umul_2(
+; CHECK-NEXT:    ret { i8, i1 } { i8 -56, i1 false }
+;
+  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 100, i8 2)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @umul_undef() nounwind {
+; CHECK-LABEL: @umul_undef(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
+  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 undef, i8 2)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @umul_both_undef() nounwind {
+; CHECK-LABEL: @umul_both_undef(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
+  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
+  ret {i8, i1} %t
+}
+
+;;-----------------------------
+;; sadd
+;;-----------------------------
+
+define {i8, i1} @sadd_1() nounwind {
+; CHECK-LABEL: @sadd_1(
+; CHECK-NEXT:    ret { i8, i1 } { i8 44, i1 false }
+;
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 42, i8 2)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @sadd_2() nounwind {
+; CHECK-LABEL: @sadd_2(
+; CHECK-NEXT:    ret { i8, i1 } { i8 -126, i1 true }
+;
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 120, i8 10)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @sadd_3() nounwind {
+; CHECK-LABEL: @sadd_3(
+; CHECK-NEXT:    ret { i8, i1 } { i8 -110, i1 false }
+;
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 -120, i8 10)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @sadd_4() nounwind {
+; CHECK-LABEL: @sadd_4(
+; CHECK-NEXT:    ret { i8, i1 } { i8 126, i1 true }
+;
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 -120, i8 -10)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @sadd_5() nounwind {
+; CHECK-LABEL: @sadd_5(
+; CHECK-NEXT:    ret { i8, i1 } { i8 -8, i1 false }
+;
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 2, i8 -10)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @sadd_undef() nounwind {
+; CHECK-LABEL: @sadd_undef(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 undef, i8 -10)
+  ret {i8, i1} %t
+}
+
+;;-----------------------------
+;; ssub
+;;-----------------------------
+
+define {i8, i1} @ssub_1() nounwind {
+; CHECK-LABEL: @ssub_1(
+; CHECK-NEXT:    ret { i8, i1 } { i8 2, i1 false }
+;
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 4, i8 2)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @ssub_2() nounwind {
+; CHECK-LABEL: @ssub_2(
+; CHECK-NEXT:    ret { i8, i1 } { i8 -2, i1 false }
+;
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 4, i8 6)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @ssub_3() nounwind {
+; CHECK-LABEL: @ssub_3(
+; CHECK-NEXT:    ret { i8, i1 } { i8 126, i1 true }
+;
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -10, i8 120)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @ssub_3b() nounwind {
+; CHECK-LABEL: @ssub_3b(
+; CHECK-NEXT:    ret { i8, i1 } { i8 -20, i1 false }
+;
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -10, i8 10)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @ssub_4() nounwind {
+; CHECK-LABEL: @ssub_4(
+; CHECK-NEXT:    ret { i8, i1 } { i8 -126, i1 true }
+;
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 120, i8 -10)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @ssub_4b() nounwind {
+; CHECK-LABEL: @ssub_4b(
+; CHECK-NEXT:    ret { i8, i1 } { i8 30, i1 false }
+;
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 20, i8 -10)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @ssub_5() nounwind {
+; CHECK-LABEL: @ssub_5(
+; CHECK-NEXT:    ret { i8, i1 } { i8 -10, i1 false }
+;
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -20, i8 -10)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @ssub_undef() nounwind {
+; CHECK-LABEL: @ssub_undef(
+; CHECK-NEXT:    ret { i8, i1 } undef
+;
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 undef, i8 -10)
+  ret {i8, i1} %t
+}
+
+;;-----------------------------
+;; smul
+;;-----------------------------
+
+define {i8, i1} @smul_1() nounwind {
+; CHECK-LABEL: @smul_1(
+; CHECK-NEXT:    ret { i8, i1 } { i8 -56, i1 true }
+;
+  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 -20, i8 -10)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @smul_undef() nounwind {
+; CHECK-LABEL: @smul_undef(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
+  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 -20, i8 undef)
+  ret {i8, i1} %t
+}
+
+define {i8, i1} @smul_both_undef() nounwind {
+; CHECK-LABEL: @smul_both_undef(
+; CHECK-NEXT:    ret { i8, i1 } zeroinitializer
+;
+  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
+  ret {i8, i1} %t
+}

Added: llvm/trunk/test/Transforms/ConstProp/phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/phi.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,17 @@
+; This is a basic sanity check for constant propagation.  The add instruction
+; should be eliminated.
+
+; RUN: opt < %s -constprop -die -S | not grep phi
+
+define i32 @test(i1 %B) {
+BB0:
+        br i1 %B, label %BB1, label %BB3
+
+BB1:            ; preds = %BB0
+        br label %BB3
+
+BB3:            ; preds = %BB1, %BB0
+        %Ret = phi i32 [ 1, %BB0 ], [ 1, %BB1 ]         ; <i32> [#uses=1]
+        ret i32 %Ret
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/remtest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/remtest.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/remtest.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/remtest.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; Ensure constant propagation of remainder instructions is working correctly.
+
+; RUN: opt < %s -constprop -die -S | not grep rem
+
+define i32 @test1() {
+        %R = srem i32 4, 3              ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i32 @test2() {
+        %R = srem i32 123, -23          ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define float @test3() {
+        %R = frem float 0x4028E66660000000, 0x405ECDA1C0000000          ; <float> [#uses=1]
+        ret float %R
+}
+
+define double @test4() {
+        %R = frem double 0x4073833BEE07AFF8, 0x4028AAABB2A0D19C         ; <double> [#uses=1]
+        ret double %R
+}
+

Added: llvm/trunk/test/Transforms/ConstProp/shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/shift.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/shift.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/shift.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,69 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+; CHECK-LABEL: shift_undef_64
+define void @shift_undef_64(i64* %p) {
+  %r1 = lshr i64 -1, 4294967296 ; 2^32
+  ; CHECK: store i64 undef
+  store i64 %r1, i64* %p
+
+  %r2 = ashr i64 -1, 4294967297 ; 2^32 + 1
+  ; CHECK: store i64 undef
+  store i64 %r2, i64* %p
+
+  %r3 = shl i64 -1, 4294967298 ; 2^32 + 2
+  ; CHECK: store i64 undef
+  store i64 %r3, i64* %p
+
+  ret void
+}
+
+; CHECK-LABEL: shift_undef_65
+define void @shift_undef_65(i65* %p) {
+  %r1 = lshr i65 2, 18446744073709551617
+  ; CHECK: store i65 undef
+  store i65 %r1, i65* %p
+
+  %r2 = ashr i65 4, 18446744073709551617
+  ; CHECK: store i65 undef
+  store i65 %r2, i65* %p
+
+  %r3 = shl i65 1, 18446744073709551617
+  ; CHECK: store i65 undef
+  store i65 %r3, i65* %p
+
+  ret void
+}
+
+; CHECK-LABEL: shift_undef_256
+define void @shift_undef_256(i256* %p) {
+  %r1 = lshr i256 2, 18446744073709551617
+  ; CHECK: store i256 undef
+  store i256 %r1, i256* %p
+
+  %r2 = ashr i256 4, 18446744073709551618
+  ; CHECK: store i256 undef
+  store i256 %r2, i256* %p
+
+  %r3 = shl i256 1, 18446744073709551619
+  ; CHECK: store i256 undef
+  store i256 %r3, i256* %p
+
+  ret void
+}
+
+; CHECK-LABEL: shift_undef_511
+define void @shift_undef_511(i511* %p) {
+  %r1 = lshr i511 -1, 1208925819614629174706276 ; 2^80 + 100
+  ; CHECK: store i511 undef
+  store i511 %r1, i511* %p
+
+  %r2 = ashr i511 -2, 1208925819614629174706200
+  ; CHECK: store i511 undef
+  store i511 %r2, i511* %p
+
+  %r3 = shl i511 -3, 1208925819614629174706180
+  ; CHECK: store i511 undef
+  store i511 %r3, i511* %p
+
+  ret void
+}

Added: llvm/trunk/test/Transforms/ConstProp/sse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/sse.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/sse.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/sse.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,208 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+; REQUIRES: x86-registered-target
+
+define i1 @test_sse_cvts_exact() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvts_exact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
+  %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
+  %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 7.0, double undef>) nounwind
+  %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 7.0, double undef>) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 10
+  %cmp13 = icmp eq i64 %sum13, 10
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+; Inexact values should not fold as they are dependent on rounding mode
+define i1 @test_sse_cvts_inexact() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvts_inexact(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 1.75, double undef>) nounwind
+  %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 1.75, double undef>) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 4
+  %cmp13 = icmp eq i64 %sum13, 4
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+; FLT_MAX/DBL_MAX should not fold
+define i1 @test_sse_cvts_max() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvts_max(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095039, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405311, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind
+  %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind
+  %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind
+  %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; INF should not fold
+define i1 @test_sse_cvts_inf() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvts_inf(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095040, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405312, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind
+  %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind
+  %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind
+  %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; NAN should not fold
+define i1 @test_sse_cvts_nan() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvts_nan(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2143289344, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9221120237041090560, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind
+  %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind
+  %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind
+  %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+define i1 @test_sse_cvtts_exact() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvtts_exact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
+  %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
+  %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 7.0, double undef>) nounwind
+  %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 7.0, double undef>) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 10
+  %cmp13 = icmp eq i64 %sum13, 10
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+define i1 @test_sse_cvtts_inexact() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvtts_inexact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 1.75, double undef>) nounwind
+  %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 1.75, double undef>) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %cmp02 = icmp eq i32 %sum02, 2
+  %cmp13 = icmp eq i64 %sum13, 2
+  %b = and i1 %cmp02, %cmp13
+  ret i1 %b
+}
+
+; FLT_MAX/DBL_MAX should not fold
+define i1 @test_sse_cvtts_max() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvtts_max(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095039, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405311, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind
+  %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind
+  %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind
+  %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; INF should not fold
+define i1 @test_sse_cvtts_inf() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvtts_inf(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2139095040, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9218868437227405312, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind
+  %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind
+  %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind
+  %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+; NAN should not fold
+define i1 @test_sse_cvtts_nan() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvtts_nan(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+  %fm = bitcast <4 x i32> <i32 2143289344, i32 undef, i32 undef, i32 undef> to <4 x float>
+  %dm = bitcast <2 x i64> <i64 9221120237041090560, i64 undef> to <2 x double>
+  %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind
+  %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind
+  %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind
+  %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind
+  %sum02 = add i32 %i0, %i2
+  %sum13 = add i64 %i1, %i3
+  %sum02.sext = sext i32 %sum02 to i64
+  %b = icmp eq i64 %sum02.sext, %sum13
+  ret i1 %b
+}
+
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone

Added: llvm/trunk/test/Transforms/ConstProp/trunc_vec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstProp/trunc_vec.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstProp/trunc_vec.ll (added)
+++ llvm/trunk/test/Transforms/ConstProp/trunc_vec.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,9 @@
+; RUN: opt -constprop < %s
+
+; Make sure we don't crash on this one
+
+define <8 x i8> @test_truc_vec() {
+  %x = bitcast <2 x i64> <i64 1, i64 2> to <8 x i16>
+  %y = trunc <8 x i16> %x to <8 x i8>
+  ret <8 x i8> %y
+}

Added: llvm/trunk/test/Transforms/ConstantHoisting/AArch64/const-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/AArch64/const-addr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/AArch64/const-addr.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/AArch64/const-addr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt -mtriple=arm64-darwin-unknown -S -consthoist < %s | FileCheck %s
+
+%T = type { i32, i32, i32, i32 }
+
+define i32 @test1() nounwind {
+; CHECK-LABEL: test1
+; CHECK: %const = bitcast i64 68141056 to i64
+; CHECK: %1 = inttoptr i64 %const to %T*
+; CHECK: %o1 = getelementptr %T, %T* %1, i32 0, i32 1
+; CHECK: %o2 = getelementptr %T, %T* %1, i32 0, i32 2
+; CHECK: %o3 = getelementptr %T, %T* %1, i32 0, i32 3
+  %at = inttoptr i64 68141056 to %T*
+  %o1 = getelementptr %T, %T* %at, i32 0, i32 1
+  %t1 = load i32, i32* %o1
+  %o2 = getelementptr %T, %T* %at, i32 0, i32 2
+  %t2 = load i32, i32* %o2
+  %a1 = add i32 %t1, %t2
+  %o3 = getelementptr %T, %T* %at, i32 0, i32 3
+  %t3 = load i32, i32* %o3
+  %a2 = add i32 %a1, %t3
+  ret i32 %a2
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/AArch64/const-hoist-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/AArch64/const-hoist-gep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/AArch64/const-hoist-gep.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/AArch64/const-hoist-gep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt -consthoist -consthoist-gep -S -o - %s | FileCheck %s
+
+target triple = "aarch64-none--musleabi"
+
+; Check that constant GEP expressions are rewritten to one-dimensional
+; (single-index) GEPs, whose base poiner is a multi-dimensional GEP.
+; CHECK:  %const = bitcast i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0) to i32*
+; CHECK-NEXT:  store i32 undef, i32* %const, align 4
+
+; CHECK-NEXT:  %[[BC1:[a-z0-9_]+]] = bitcast i32* %const to i8*
+; CHECK-NEXT:  %[[M1:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC1]], i32 4
+; CHECK-NEXT:  %[[BC2:[a-z0-9_]+]] = bitcast i8* %[[M1]] to i32*
+; CHECK-NEXT:  store i32 undef, i32* %[[BC2]], align 4
+
+; CHECK-NEXT:  %[[BC3:[a-z0-9_]+]] = bitcast i32* %const to i8*
+; CHECK-NEXT:  %[[M2:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC3]], i32 160
+; CHECK-NEXT:  %[[BC4:[a-z0-9_]+]] = bitcast i8* %[[M2]] to i32*
+; CHECK-NEXT:  store i32 undef, i32* %[[BC4]], align 4
+
+; CHECK-NEXT:  %[[BC5:[a-z0-9_]+]] = bitcast i32* %const to i8*
+; CHECK-NEXT:  %[[M3:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC5]], i32 164
+; CHECK-NEXT:  %[[BC6:[a-z0-9_]+]] = bitcast i8* %[[M3]] to i32*
+; CHECK-NEXT:  store i32 undef, i32* %[[BC6]], align 4
+
+%0 = type { %1, %2, [9 x i16], %6, %7 }
+%1 = type { i32, i32, i32, i32, i32, i32, i16, i16, i8, i8, i16, i32, i32, i16, i8, i8 }
+%2 = type { i32, %3, i8, i8, i8, i8, i32, %4, %5, [16 x i8], i16, i16, i8, i8, i8, i8, i32, i32, i32 }
+%3 = type { i16, i8, i8 }
+%4 = type { i16, i8, i8 }
+%5 = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+%6 = type { i8, i8 }
+%7 = type { [5 x i32], [3 x i32], [6 x i32], [3 x i32], [2 x i32], [4 x i32], [3 x i32], [2 x i32], [4 x i32], [5 x i32], [3 x i32], [6 x i32], [1 x i32], i32, i32, i32, i32, i32, i32 }
+
+ at global = external dso_local local_unnamed_addr global %0, align 4
+
+define dso_local void @zot() {
+bb:
+  store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0), align 4
+  store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 1), align 4
+  store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 0), align 4
+  store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 1), align 4
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt -mtriple=arm64-darwin-unknown -S -consthoist < %s | FileCheck %s
+
+define i128 @test1(i128 %a) nounwind {
+; CHECK-LABEL: test1
+; CHECK: %const = bitcast i128 12297829382473034410122878 to i128
+  %1 = add i128 %a, 12297829382473034410122878
+  %2 = add i128 %1, 12297829382473034410122878
+  ret i128 %2
+}
+
+; Check that we don't hoist large, but cheap constants
+define i512 @test2(i512 %a) nounwind {
+; CHECK-LABEL: test2
+; CHECK-NOT: %const = bitcast i512 7 to i512
+  %1 = and i512 %a, 7
+  %2 = or i512 %1, 7
+  ret i512 %2
+}
+
+; Check that we don't hoist the shift value of a shift instruction.
+define i512 @test3(i512 %a) nounwind {
+; CHECK-LABEL: test3
+; CHECK-NOT: %const = bitcast i512 504 to i512
+  %1 = shl i512 %a, 504
+  %2 = ashr i512 %1, 504
+  ret i512 %2
+}

Added: llvm/trunk/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/AArch64/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True

Added: llvm/trunk/test/Transforms/ConstantHoisting/ARM/bad-cases.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/ARM/bad-cases.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/ARM/bad-cases.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/bad-cases.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,140 @@
+; RUN: opt -consthoist -S < %s | FileCheck %s
+target triple = "thumbv6m-none-eabi"
+
+; Allocas in the entry block get handled (for free) by
+; prologue/epilogue. Elsewhere they're fair game though.
+define void @avoid_allocas() {
+; CHECK-LABEL: @avoid_allocas
+; CHECK: %addr1 = alloca i8, i32 1000
+; CHECK: %addr2 = alloca i8, i32 1020
+
+  %addr1 = alloca i8, i32 1000
+  %addr2 = alloca i8, i32 1020
+  br label %elsewhere
+
+elsewhere:
+; CHECK: [[BASE:%.*]] = bitcast i32 1000 to i32
+; CHECK: alloca i8, i32 [[BASE]]
+; CHECK: [[NEXT:%.*]] = add i32 [[BASE]], 20
+; CHECK: alloca i8, i32 [[NEXT]]
+
+  %addr3 = alloca i8, i32 1000
+  %addr4 = alloca i8, i32 1020
+
+  ret void
+}
+
+; The case values of switch instructions are required to be constants.
+define void @avoid_switch(i32 %in) {
+; CHECK-LABEL: @avoid_switch
+; CHECK:   switch i32 %in, label %default [
+; CHECK:       i32 1000, label %bb1
+; CHECK:       i32 1020, label %bb2
+; CHECK:   ]
+
+  switch i32 %in, label %default
+      [ i32 1000, label %bb1
+        i32 1020, label %bb2 ]
+
+bb1:
+  ret void
+
+bb2:
+  ret void
+
+default:
+  ret void
+}
+
+; We don't want to convert constant divides because the benefit from converting
+; them to a mul in the backend is larget than constant materialization savings.
+define void @signed_const_division(i32 %in1, i32 %in2, i32* %addr) {
+; CHECK-LABEL: @signed_const_division
+; CHECK: %res1 = sdiv i32 %l1, 1000000000
+; CHECK: %res2 = srem i32 %l2, 1000000000
+entry:
+  br label %loop
+
+loop:
+  %l1 = phi i32 [%res1, %loop], [%in1, %entry]
+  %l2 = phi i32 [%res2, %loop], [%in2, %entry]
+  %res1 = sdiv i32 %l1, 1000000000
+  store volatile i32 %res1, i32* %addr
+  %res2 = srem i32 %l2, 1000000000
+  store volatile i32 %res2, i32* %addr
+  %again = icmp eq i32 %res1, %res2
+  br i1 %again, label %loop, label %end
+
+end:
+  ret void
+}
+
+define void @unsigned_const_division(i32 %in1, i32 %in2, i32* %addr) {
+; CHECK-LABEL: @unsigned_const_division
+; CHECK: %res1 = udiv i32 %l1, 1000000000
+; CHECK: %res2 = urem i32 %l2, 1000000000
+
+entry:
+  br label %loop
+
+loop:
+  %l1 = phi i32 [%res1, %loop], [%in1, %entry]
+  %l2 = phi i32 [%res2, %loop], [%in2, %entry]
+  %res1 = udiv i32 %l1, 1000000000
+  store volatile i32 %res1, i32* %addr
+  %res2 = urem i32 %l2, 1000000000
+  store volatile i32 %res2, i32* %addr
+  %again = icmp eq i32 %res1, %res2
+  br i1 %again, label %loop, label %end
+
+end:
+  ret void
+}
+
+;PR 28282: even when data type is larger than 64-bit, the bit width of the
+;constant operand could be smaller than 64-bit. In this case, there is no
+;benefit to hoist the constant.
+define i32 @struct_type_test(i96 %a0, i96 %a1) {
+;CHECK-LABEL: @struct_type_test
+entry:
+;CHECK-NOT: %const = bitcast i96 32 to i96
+;CHECK: lshr0 = lshr i96 %a0, 32
+  %lshr0 = lshr i96 %a0, 32
+  %cast0 = trunc i96 %lshr0 to i32
+;CHECK: lshr1 = lshr i96 %a1, 32
+  %lshr1 = lshr i96 %a1, 32
+  %cast1 = trunc i96 %lshr1 to i32
+  %ret = add i32 %cast0, %cast1
+  ret i32 %ret
+}
+
+ at exception_type = external global i8
+
+; Constants in inline ASM should not be hoisted.
+define i32 @inline_asm_invoke() personality i8* null {
+;CHECK-LABEL: @inline_asm_invoke
+;CHECK-NOT: %const = 214672
+;CHECK: %X = invoke i32 asm "bswap $0", "=r,r"(i32 214672)
+  %X = invoke i32 asm "bswap $0", "=r,r"(i32 214672)
+                  to label %L unwind label %lpad
+;CHECK: %Y = invoke i32 asm "bswap $0", "=r,r"(i32 214672)
+  %Y = invoke i32 asm "bswap $0", "=r,r"(i32 214672)
+                  to label %L unwind label %lpad
+L:
+  ret i32 %X
+lpad:
+  %lp = landingpad i32
+      cleanup
+      catch i8* @exception_type
+  ret i32 1
+}
+
+define i32 @inline_asm_call() {
+;CHECK-LABEL: @inline_asm_call
+;CHECK-NOT: %const = 214672
+;CHECK: %X = call i32 asm "bswap $0", "=r,r"(i32 214672)
+  %X = call i32 asm "bswap $0", "=r,r"(i32 214672)
+;CHECK: %Y = call i32 asm "bswap $0", "=r,r"(i32 214672)
+  %Y = call i32 asm "bswap $0", "=r,r"(i32 214672)
+  ret i32 %X
+}

Added: llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,101 @@
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso -S < %s | FileCheck %s -check-prefix=PGSO
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO
+
+; There are different candidates here for the base constant: 1073876992 and
+; 1073876996. But we don't want to see the latter because it results in
+; negative offsets.
+
+define void @foo() #0 {
+entry:
+; CHECK-LABEL: @foo
+; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4
+; CHECK-LABEL: @foo_pgso
+  %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %or = or i32 %0, 1
+  store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %and = and i32 %1, -117506048
+  store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %and1 = and i32 %2, -17367041
+  store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %and2 = and i32 %3, -262145
+  store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %and3 = and i32 %4, -8323073
+  store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
+  store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
+  %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %or4 = or i32 %5, 65536
+  store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %or6.i.i = or i32 %6, 16
+  store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %and7.i.i = and i32 %7, -4
+  store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %or8.i.i = or i32 %8, 2
+  store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  ret void
+}
+
+attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
+
+define void @foo_pgso() #1 !prof !14 {
+entry:
+; PGSO-LABEL: @foo_pgso
+; PGSO-NOT: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
+; NPGSO-LABEL: @foo_pgso
+; NPGSO: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
+  %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %or = or i32 %0, 1
+  store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %and = and i32 %1, -117506048
+  store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %and1 = and i32 %2, -17367041
+  store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %and2 = and i32 %3, -262145
+  store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %and3 = and i32 %4, -8323073
+  store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
+  store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
+  %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %or4 = or i32 %5, 65536
+  store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %or6.i.i = or i32 %6, 16
+  store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %and7.i.i = and i32 %7, -4
+  store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %or8.i.i = or i32 %8, 2
+  store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  ret void
+}
+
+attributes #1 = { norecurse nounwind readnone uwtable }  ; no optsize or minsize
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}

Added: llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-hoist-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-hoist-gep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-hoist-gep.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-hoist-gep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt -consthoist -consthoist-gep -S -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-none--musleabi"
+
+; Check that constant GEP expressions are rewritten to one-dimensional
+; (single-index) GEPs, whose base poiner is a multi-dimensional GEP.
+; CHECK-DAG:  %[[C1:const[0-9]?]] = bitcast i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 0) to i32*
+; CHECK-DAG:  %[[C2:const[0-9]?]] = bitcast i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0) to i32*
+
+; CHECK:  store i32 undef, i32* %[[C2]], align 4
+; CHECK-NEXT:  %[[BC1:[a-z0-9_]+]] = bitcast i32* %[[C2]] to i8*
+; CHECK-NEXT:  %[[M1:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC1]], i32 4
+; CHECK-NEXT:  %[[BC2:[a-z0-9_]+]] = bitcast i8* %[[M1]] to i32*
+; CHECK-NEXT:  store i32 undef, i32* %[[BC2]], align 4
+
+; CHECK-NEXT:  store i32 undef, i32* %[[C1]], align 4
+; CHECK-NEXT:  %[[BC3:[a-z0-9_]+]] = bitcast i32* %[[C1]] to i8*
+; CHECK-NEXT:  %[[M2:[a-z0-9_]+]] = getelementptr i8, i8* %[[BC3]], i32 4
+; CHECK-NEXT:  %[[BC4:[a-z0-9_]+]] = bitcast i8* %[[M2]] to i32*
+; CHECK-NEXT:  store i32 undef, i32* %[[BC4]], align 4
+
+%0 = type { %1, %2, [9 x i16], %6, %7 }
+%1 = type { i32, i32, i32, i32, i32, i32, i16, i16, i8, i8, i16, i32, i32, i16, i8, i8 }
+%2 = type { i32, %3, i8, i8, i8, i8, i32, %4, %5, [16 x i8], i16, i16, i8, i8, i8, i8, i32, i32, i32 }
+%3 = type { i16, i8, i8 }
+%4 = type { i16, i8, i8 }
+%5 = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+%6 = type { i8, i8 }
+%7 = type { [5 x i32], [3 x i32], [6 x i32], [3 x i32], [2 x i32], [4 x i32], [3 x i32], [2 x i32], [4 x i32], [5 x i32], [3 x i32], [6 x i32], [1 x i32], i32, i32, i32, i32, i32, i32 }
+
+ at global = external dso_local local_unnamed_addr global %0, align 4
+
+define dso_local void @zot() {
+bb:
+  store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 0), align 4
+  store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 0, i32 1), align 4
+  store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 0), align 4
+  store i32 undef, i32* getelementptr inbounds (%0, %0* @global, i32 0, i32 4, i32 11, i32 1), align 4
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/gep-struct-index.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt -consthoist -S < %s | FileCheck %s
+target triple = "thumbv6m-none-eabi"
+
+%T = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32 }
+
+; Indices for GEPs that index into a struct type should not be hoisted.
+define i32 @test1(%T* %P) nounwind {
+; CHECK-LABEL:  @test1
+; CHECK:        %const = bitcast i32 256 to i32
+; CHECK:        %addr1 = getelementptr %T, %T* %P, i32 %const, i32 256
+; CHECK:        %addr2 = getelementptr %T, %T* %P, i32 %const, i32 256
+; The first index into the pointer is hoisted, but the second one into the
+; struct isn't.
+  %addr1 = getelementptr %T, %T* %P, i32 256, i32 256
+  %tmp1 = load i32, i32* %addr1
+  %addr2 = getelementptr %T, %T* %P, i32 256, i32 256
+  %tmp2 = load i32, i32* %addr2
+  %tmp4 = add i32 %tmp1, %tmp2
+  ret i32 %tmp4
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/ARM/insertvalue.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/ARM/insertvalue.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/ARM/insertvalue.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/insertvalue.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; RUN: opt -consthoist -S < %s | FileCheck %s
+target triple = "thumbv6m-none-eabi"
+
+%T = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+i32, i32, i32, i32, i32, i32 }
+
+; The second operand of insertvalue is able to be hoisted.
+define void @test1(%T %P) {
+; CHECK-LABEL:  @test1
+; CHECK:        %const = bitcast i32 256 to i32
+; CHECK:        %1 = insertvalue %T %P, i32 %const, 256
+; CHECK:        %2 = insertvalue %T %P, i32 %const, 256
+  %1 = insertvalue %T %P, i32 256, 256
+  %2 = insertvalue %T %P, i32 256, 256
+  ret void
+}

Added: llvm/trunk/test/Transforms/ConstantHoisting/ARM/is-legal-addressing-imm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/ARM/is-legal-addressing-imm.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/ARM/is-legal-addressing-imm.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/is-legal-addressing-imm.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,120 @@
+; RUN: opt -consthoist -S -o - %s | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-none--musleabi"
+
+; Check that for i8 type, the maximum legal offset is 31.
+; Also check that an constant used as value to be stored rather than
+; pointer in a store instruction is hoisted.
+; CHECK: foo_i8
+; CHECK-DAG:  %[[C1:const[0-9]?]] = bitcast i32 805874720 to i32
+; CHECK-DAG:  %[[C2:const[0-9]?]] = bitcast i32 805874688 to i32
+; CHECK-DAG:  %[[C3:const[0-9]?]] = bitcast i32 805873720 to i32
+; CHECK-DAG:  %[[C4:const[0-9]?]] = bitcast i32 805873688 to i32
+; CHECK:  %0 = inttoptr i32 %[[C2]] to i8*
+; CHECK-NEXT:  %1 = load volatile i8, i8* %0
+; CHECK-NEXT:  %[[M1:const_mat[0-9]?]] = add i32 %[[C2]], 4
+; CHECK-NEXT:  %2 = inttoptr i32 %[[M1]] to i8*
+; CHECK-NEXT:  %3 = load volatile i8, i8* %2
+; CHECK-NEXT:  %[[M2:const_mat[0-9]?]] = add i32 %[[C2]], 31
+; CHECK-NEXT:  %4 = inttoptr i32 %[[M2]] to i8*
+; CHECK-NEXT:  %5 = load volatile i8, i8* %4
+; CHECK-NEXT:  %6 = inttoptr i32 %[[C1]] to i8*
+; CHECK-NEXT:  %7 = load volatile i8, i8* %6
+; CHECK-NEXT:  %[[M3:const_mat[0-9]?]] = add i32 %[[C1]], 7
+; CHECK-NEXT:  %8 = inttoptr i32 %[[M3]] to i8*
+; CHECK-NEXT:  %9 = load volatile i8, i8* %8
+; CHECK-NEXT:  %10 = inttoptr i32 %[[C4]] to i8*
+; CHECK-NEXT:  store i8 %9, i8* %10
+; CHECK-NEXT:  %[[M4:const_mat[0-9]?]] = add i32 %[[C4]], 31
+; CHECK-NEXT:  %11 = inttoptr i32 %[[M4]] to i8*
+; CHECK-NEXT:  store i8 %7, i8* %11
+; CHECK-NEXT:  %12 = inttoptr i32 %[[C3]] to i8*
+; CHECK-NEXT:  store i8 %5, i8* %12
+; CHECK-NEXT:  %[[M5:const_mat[0-9]?]] = add i32 %[[C3]], 7
+; CHECK-NEXT:  %13 = inttoptr i32 %[[M5]] to i8*
+; CHECK-NEXT:  store i8 %3, i8* %13
+; CHECK-NEXT:  %[[M6:const_mat[0-9]?]] = add i32 %[[C1]], 80
+; CHECK-NEXT:  %14 = inttoptr i32 %[[M6]] to i8*
+; CHECK-NEXT:  store i8* %14, i8** @goo
+
+ at goo = global i8* undef
+
+define void @foo_i8() {
+entry:
+  %0 = load volatile i8, i8* inttoptr (i32 805874688 to i8*)
+  %1 = load volatile i8, i8* inttoptr (i32 805874692 to i8*)
+  %2 = load volatile i8, i8* inttoptr (i32 805874719 to i8*)
+  %3 = load volatile i8, i8* inttoptr (i32 805874720 to i8*)
+  %4 = load volatile i8, i8* inttoptr (i32 805874727 to i8*)
+  store i8 %4, i8* inttoptr(i32 805873688 to i8*)
+  store i8 %3, i8* inttoptr(i32 805873719 to i8*)
+  store i8 %2, i8* inttoptr(i32 805873720 to i8*)
+  store i8 %1, i8* inttoptr(i32 805873727 to i8*)
+  store i8* inttoptr(i32 805874800 to i8*), i8** @goo
+  ret void
+}
+
+; Check that for i16 type, the maximum legal offset is 62.
+; CHECK: foo_i16
+; CHECK-DAG: %[[C1:const[0-9]?]] = bitcast i32 805874752 to i32
+; CHECK-DAG: %[[C2:const[0-9]?]] = bitcast i32 805874688 to i32
+; CHECK: %0 = inttoptr i32 %[[C2]] to i16*
+; CHECK-NEXT: %1 = load volatile i16, i16* %0, align 2
+; CHECK-NEXT: %[[M1:const_mat[0-9]?]] = add i32 %[[C2]], 4
+; CHECK-NEXT: %2 = inttoptr i32 %[[M1]] to i16*
+; CHECK-NEXT: %3 = load volatile i16, i16* %2, align 2
+; CHECK-NEXT: %[[M2:const_mat[0-9]?]] = add i32 %[[C2]], 32
+; CHECK-NEXT: %4 = inttoptr i32 %[[M2]] to i16*
+; CHECK-NEXT: %5 = load volatile i16, i16* %4, align 2
+; CHECK-NEXT: %[[M3:const_mat[0-9]?]] = add i32 %[[C2]], 62
+; CHECK-NEXT: %6 = inttoptr i32 %[[M3]] to i16*
+; CHECK-NEXT: %7 = load volatile i16, i16* %6, align 2
+; CHECK-NEXT: %8 = inttoptr i32 %[[C1]] to i16*
+; CHECK-NEXT: %9 = load volatile i16, i16* %8, align 2
+; CHECK-NEXT: %[[M4:const_mat[0-9]?]] = add i32 %[[C1]], 22
+; CHECK-NEXT: %10 = inttoptr i32 %[[M4]] to i16*
+; CHECK-NEXT: %11 = load volatile i16, i16* %10, align 2
+
+define void @foo_i16() {
+entry:
+  %0 = load volatile i16, i16* inttoptr (i32 805874688 to i16*), align 2
+  %1 = load volatile i16, i16* inttoptr (i32 805874692 to i16*), align 2
+  %2 = load volatile i16, i16* inttoptr (i32 805874720 to i16*), align 2
+  %3 = load volatile i16, i16* inttoptr (i32 805874750 to i16*), align 2
+  %4 = load volatile i16, i16* inttoptr (i32 805874752 to i16*), align 2
+  %5 = load volatile i16, i16* inttoptr (i32 805874774 to i16*), align 2
+  ret void
+}
+
+; Check that for i32 type, the maximum legal offset is 124.
+; CHECK: foo_i32
+; CHECK-DAG:  %[[C1:const[0-9]?]] = bitcast i32 805874816 to i32
+; CHECK-DAG:  %[[C2:const[0-9]?]] = bitcast i32 805874688 to i32
+; CHECK:  %0 = inttoptr i32 %[[C2]] to i32*
+; CHECK-NEXT:  %1 = load volatile i32, i32* %0, align 4
+; CHECK-NEXT:  %[[M1:const_mat[0-9]?]] = add i32 %[[C2]], 4
+; CHECK-NEXT:  %2 = inttoptr i32 %[[M1]] to i32*
+; CHECK-NEXT:  %3 = load volatile i32, i32* %2, align 4
+; CHECK-NEXT:  %[[M2:const_mat[0-9]?]] = add i32 %[[C2]], 124
+; CHECK-NEXT:  %4 = inttoptr i32 %[[M2]] to i32*
+; CHECK-NEXT:  %5 = load volatile i32, i32* %4, align 4
+; CHECK-NEXT:  %6 = inttoptr i32 %[[C1]] to i32*
+; CHECK-NEXT:  %7 = load volatile i32, i32* %6, align 4
+; CHECK-NEXT:  %[[M3:const_mat[0-9]?]] = add i32 %[[C1]], 8
+; CHECK-NEXT:  %8 = inttoptr i32 %[[M3]] to i32*
+; CHECK-NEXT:  %9 = load volatile i32, i32* %8, align 4
+; CHECK-NEXT:  %[[M4:const_mat[0-9]?]] = add i32 %[[C1]], 12
+; CHECK-NEXT:  %10 = inttoptr i32 %[[M4]] to i32*
+; CHECK-NEXT:  %11 = load volatile i32, i32* %10, align 4
+
+define void @foo_i32() {
+entry:
+  %0 = load volatile i32, i32* inttoptr (i32 805874688 to i32*), align 4
+  %1 = load volatile i32, i32* inttoptr (i32 805874692 to i32*), align 4
+  %2 = load volatile i32, i32* inttoptr (i32 805874812 to i32*), align 4
+  %3 = load volatile i32, i32* inttoptr (i32 805874816 to i32*), align 4
+  %4 = load volatile i32, i32* inttoptr (i32 805874824 to i32*), align 4
+  %5 = load volatile i32, i32* inttoptr (i32 805874828 to i32*), align 4
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/ARM/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/ARM/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/ARM/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True

Added: llvm/trunk/test/Transforms/ConstantHoisting/ARM/same-offset-multi-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/ARM/same-offset-multi-types.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/ARM/same-offset-multi-types.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/same-offset-multi-types.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt -consthoist -consthoist-gep -S -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-none--musleabi"
+
+; Check that for the same offset from the base constant, different types are materialized separately.
+; CHECK: %const = bitcast %5** getelementptr inbounds (%0, %0* @global, i32 0, i32 2, i32 0) to %5**
+; CHECK: %tmp = load %5*, %5** %const, align 4
+; CHECK: %base_bitcast = bitcast %5** %const to i8*
+; CHECK: %mat_gep = getelementptr i8, i8* %base_bitcast, i32 0
+; CHECK: %mat_bitcast = bitcast i8* %mat_gep to %4*
+; CHECK: tail call void undef(%5* nonnull %tmp, %4* %mat_bitcast)
+
+%0 = type { [16 x %1], %2, %4, [16 x %5], %6, %7, i32, [4 x i32], [8 x %3], i8, i8, i8, i8, i8, i8, i8, %8, %11, %11*, i32, i16, i8, i8, i8, i8, i8, i8, [15 x i16], i8, i8, [23 x %12], i8, i8*, i8, %13, i8, i8 }
+%1 = type { i32, i32, i8, i8, i8, i8, i8, i8, i8, i8 }
+%2 = type { %3*, i16, i16, i16 }
+%3 = type { [4 x i32] }
+%4 = type { %5*, %5*, i8 }
+%5 = type { [4 x i32], i8*, i8, i8 }
+%6 = type { i8, [4 x i32] }
+%7 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%8 = type { [16 x %9], %9*, %9*, %9*, %9*, %11, %11, %11, i8, i8, i8, i8 }
+%9 = type { %1, %11, %11, %9*, %9*, %10, i8, i8, i8, i8 }
+%10 = type { i32, i16 }
+%11 = type { %11*, %11* }
+%12 = type { i8, i16, i32 }
+%13 = type { i32, i32, i8 }
+
+ at global = external dso_local global %0, align 4
+
+; Function Attrs: nounwind optsize ssp
+define dso_local void @zot() {
+bb:
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp = load %5*, %5** getelementptr inbounds (%0, %0* @global, i32 0, i32 2, i32 0), align 4
+  tail call void undef(%5* nonnull %tmp, %4* getelementptr inbounds (%0, %0* @global, i32 0, i32 2))
+  unreachable
+
+bb2:                                              ; preds = %bb
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%T = type { i32, i32, i32, i32 }
+
+; Test if even cheap base addresses are hoisted.
+define i32 @test1() nounwind {
+; CHECK-LABEL:  @test1
+; CHECK:        %const = bitcast i32 12345678 to i32
+; CHECK:        %1 = inttoptr i32 %const to %T*
+; CHECK:        %addr1 = getelementptr %T, %T* %1, i32 0, i32 1
+  %addr1 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 1
+  %tmp1 = load i32, i32* %addr1
+  %addr2 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 2
+  %tmp2 = load i32, i32* %addr2
+  %addr3 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 3
+  %tmp3 = load i32, i32* %addr3
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'PowerPC' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/masks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/masks.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/masks.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/PowerPC/masks.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,66 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Here the masks are all contiguous, and should not be hoisted.
+define i32 @test1() nounwind {
+entry:
+; CHECK-LABEL:  @test1
+; CHECK-NOT: bitcast i32 65535 to i32
+; CHECK: and i32 undef, 65535
+  %conv121 = and i32 undef, 65535
+  br i1 undef, label %if.then152, label %if.end167
+
+if.then152:
+; CHECK: and i32 undef, 65535
+  %conv153 = and i32 undef, 65535
+  br i1 undef, label %if.end167, label %end2
+
+if.end167:
+; CHECK: and i32 {{.*}}, 32768
+  %shl161 = shl nuw nsw i32 %conv121, 15
+  %0 = load i8, i8* undef, align 1
+  %conv169 = zext i8 %0 to i32
+  %shl170 = shl nuw nsw i32 %conv169, 7
+  %shl161.masked = and i32 %shl161, 32768
+  %conv174 = or i32 %shl170, %shl161.masked
+  %cmp178 = icmp ugt i32 %conv174, 32767
+  br i1 %cmp178, label %end1, label %end2
+
+end1:
+  unreachable
+
+end2:
+  unreachable
+}
+
+; Here the masks are not contiguous, and should be hoisted.
+define i32 @test2() nounwind {
+entry:
+; CHECK-LABEL: @test2
+; CHECK: bitcast i32 65531 to i32
+  %conv121 = and i32 undef, 65531
+  br i1 undef, label %if.then152, label %if.end167
+
+if.then152:
+  %conv153 = and i32 undef, 65531
+  br i1 undef, label %if.end167, label %end2
+
+if.end167:
+; CHECK: add i32 {{.*}}, -32758
+  %shl161 = shl nuw nsw i32 %conv121, 15
+  %0 = load i8, i8* undef, align 1
+  %conv169 = zext i8 %0 to i32
+  %shl170 = shl nuw nsw i32 %conv169, 7
+  %shl161.masked = and i32 %shl161, 32773
+  %conv174 = or i32 %shl170, %shl161.masked
+  %cmp178 = icmp ugt i32 %conv174, 32767
+  br i1 %cmp178, label %end1, label %end2
+
+end1:
+  unreachable
+
+end2:
+  unreachable
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/X86/bad-cases.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/bad-cases.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/X86/bad-cases.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/X86/bad-cases.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -consthoist -S < %s | FileCheck %s
+target triple = "x86_64--"
+
+; We don't want to convert constant divides because the benefit from converting
+; them to a mul in the backend is larget than constant materialization savings.
+define void @signed_const_division(i64 %in1, i64 %in2, i64* %addr) {
+; CHECK-LABEL: @signed_const_division(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[L1:%.*]] = phi i64 [ [[RES1:%.*]], [[LOOP]] ], [ [[IN1:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[L2:%.*]] = phi i64 [ [[RES2:%.*]], [[LOOP]] ], [ [[IN2:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[RES1]] = sdiv i64 [[L1]], 4294967296
+; CHECK-NEXT:    store volatile i64 [[RES1]], i64* [[ADDR:%.*]]
+; CHECK-NEXT:    [[RES2]] = srem i64 [[L2]], 4294967296
+; CHECK-NEXT:    store volatile i64 [[RES2]], i64* [[ADDR]]
+; CHECK-NEXT:    [[AGAIN:%.*]] = icmp eq i64 [[RES1]], [[RES2]]
+; CHECK-NEXT:    br i1 [[AGAIN]], label [[LOOP]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %l1 = phi i64 [%res1, %loop], [%in1, %entry]
+  %l2 = phi i64 [%res2, %loop], [%in2, %entry]
+  %res1 = sdiv i64 %l1, 4294967296
+  store volatile i64 %res1, i64* %addr
+  %res2 = srem i64 %l2, 4294967296
+  store volatile i64 %res2, i64* %addr
+  %again = icmp eq i64 %res1, %res2
+  br i1 %again, label %loop, label %end
+
+end:
+  ret void
+}
+
+define void @unsigned_const_division(i64 %in1, i64 %in2, i64* %addr) {
+; CHECK-LABEL: @unsigned_const_division(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[L1:%.*]] = phi i64 [ [[RES1:%.*]], [[LOOP]] ], [ [[IN1:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[L2:%.*]] = phi i64 [ [[RES2:%.*]], [[LOOP]] ], [ [[IN2:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[RES1]] = udiv i64 [[L1]], 4294967296
+; CHECK-NEXT:    store volatile i64 [[RES1]], i64* [[ADDR:%.*]]
+; CHECK-NEXT:    [[RES2]] = urem i64 [[L2]], 4294967296
+; CHECK-NEXT:    store volatile i64 [[RES2]], i64* [[ADDR]]
+; CHECK-NEXT:    [[AGAIN:%.*]] = icmp eq i64 [[RES1]], [[RES2]]
+; CHECK-NEXT:    br i1 [[AGAIN]], label [[LOOP]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+
+entry:
+  br label %loop
+
+loop:
+  %l1 = phi i64 [%res1, %loop], [%in1, %entry]
+  %l2 = phi i64 [%res2, %loop], [%in2, %entry]
+  %res1 = udiv i64 %l1, 4294967296
+  store volatile i64 %res1, i64* %addr
+  %res2 = urem i64 %l2, 4294967296
+  store volatile i64 %res2, i64* %addr
+  %again = icmp eq i64 %res1, %res2
+  br i1 %again, label %loop, label %end
+
+end:
+  ret void
+}
+
+define i32 @PR40934() {
+; CHECK-LABEL: @PR40934(
+; CHECK-NEXT:    ret i32 undef
+; CHECK:       bb:
+; CHECK-NEXT:    [[T2:%.*]] = call i32 (i64, ...) bitcast (i32 (...)* @d to i32 (i64, ...)*)(i64 7788015061)
+; CHECK-NEXT:    [[T3:%.*]] = and i64 [[T3]], 7788015061
+; CHECK-NEXT:    br label [[BB:%.*]]
+;
+  ret i32 undef
+
+bb:
+  %t2 = call i32 (i64, ...) bitcast (i32 (...)* @d to i32 (i64, ...)*)(i64 7788015061)
+  %t3 = and i64 %t3, 7788015061
+  br label %bb
+}
+
+declare i32 @d(...)
+
+define i32 @PR40930() {
+; CHECK-LABEL: @PR40930(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 (i64, i64, ...) bitcast (i32 (...)* @c to i32 (i64, i64, ...)*)(i64 4208870971, i64 4208870971)
+; CHECK-NEXT:    br label [[BB1]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+bb:
+  %tmp = alloca i32, align 4
+  br label %bb1
+
+bb1:                                              ; preds = %bb3, %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %bb1
+  br label %bb2
+
+bb3:                                              ; No predecessors!
+  %tmp4 = call i32 (i64, i64, ...) bitcast (i32 (...)* @c to i32 (i64, i64, ...)*)(i64 4208870971, i64 4208870971)
+  br label %bb1
+
+bb5:                                              ; No predecessors!
+  %tmp6 = load i32, i32* %tmp, align 4
+  ret i32 %tmp6
+}
+
+declare i32 @c(...)

Added: llvm/trunk/test/Transforms/ConstantHoisting/X86/cast-inst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/cast-inst.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/X86/cast-inst.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/X86/cast-inst.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+; RUN: opt -S -passes='consthoist' < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Check if the materialization of the constant and the cast instruction are
+; inserted in the correct order.
+define i32 @cast_inst_test() {
+; CHECK-LABEL:  @cast_inst_test
+; CHECK:        %const = bitcast i64 4646526064 to i64
+; CHECK:        %1 = inttoptr i64 %const to i32*
+; CHECK:        %v0 = load i32, i32* %1, align 16
+; CHECK:        %const_mat = add i64 %const, 16
+; CHECK-NEXT:   %2 = inttoptr i64 %const_mat to i32*
+; CHECK-NEXT:   %v1 = load i32, i32* %2, align 16
+; CHECK:        %const_mat1 = add i64 %const, 32
+; CHECK-NEXT:   %3 = inttoptr i64 %const_mat1 to i32*
+; CHECK-NEXT:   %v2 = load i32, i32* %3, align 16
+  %a0 = inttoptr i64 4646526064 to i32*
+  %v0 = load i32, i32* %a0, align 16
+  %a1 = inttoptr i64 4646526080 to i32*
+  %v1 = load i32, i32* %a1, align 16
+  %a2 = inttoptr i64 4646526096 to i32*
+  %v2 = load i32, i32* %a2, align 16
+  %r0 = add i32 %v0, %v1
+  %r1 = add i32 %r0, %v2
+  ret i32 %r1
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/X86/const-base-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/const-base-addr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/X86/const-base-addr.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/X86/const-base-addr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%T = type { i32, i32, i32, i32 }
+
+; Test if even cheap base addresses are hoisted.
+define i32 @test1() nounwind {
+; CHECK-LABEL:  @test1
+; CHECK:        %const = bitcast i32 12345678 to i32
+; CHECK:        %1 = inttoptr i32 %const to %T*
+; CHECK:        %addr1 = getelementptr %T, %T* %1, i32 0, i32 1
+  %addr1 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 1
+  %tmp1 = load i32, i32* %addr1
+  %addr2 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 2
+  %tmp2 = load i32, i32* %addr2
+  %addr3 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 3
+  %tmp3 = load i32, i32* %addr3
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/X86/dbg-dominatingblock.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/dbg-dominatingblock.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/X86/dbg-dominatingblock.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/X86/dbg-dominatingblock.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,55 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+; ModuleID = 'test-hoist-debug.cpp'
+source_filename = "test-hoist-debug.cpp"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind optnone uwtable
+define i32 @_Z3foov() !dbg !7 {
+; CHECK: bitcast
+; CHECK-NOT: !dbg !11
+; CHECK: inttoptr 
+entry:
+  %a0 = inttoptr i64 4646526064 to i32*
+  %v0 = load i32, i32* %a0, align 16, !dbg !11
+  %c = alloca i32, align 4
+  store i32 1, i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %a1 = inttoptr i64 4646526080 to i32*
+  %v1 = load i32, i32* %a1, align 16, !dbg !11
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %a2 = inttoptr i64 4646526096 to i32*
+  %v2 = load i32, i32* %a2, align 16, !dbg !11
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %vx = phi i32 [%v1, %if.then], [%v2, %if.else]
+  %r0 = add i32 %vx, %v0
+
+  ret i32 %r0
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 313291)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "test-hoist-debug.cpp", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 6.0.0 (trunk 313291)"}
+!7 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DILocation(line: 2, column: 3, scope: !7)
+!12 = !DILocation(line: 3, column: 3, scope: !7)
+!13 = !DILocation(line: 4, column: 3, scope: !7)

Added: llvm/trunk/test/Transforms/ConstantHoisting/X86/dbg-samebasicblock.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/dbg-samebasicblock.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/X86/dbg-samebasicblock.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/X86/dbg-samebasicblock.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,41 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+; ModuleID = 'test-hoist-debug.cpp'
+source_filename = "test-hoist-debug.cpp"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind optnone uwtable
+define i32 @_Z3foov() !dbg !7 {
+; CHECK: bitcast
+; CHECK: !dbg !11
+; CHECK: inttoptr 
+  %a0 = inttoptr i64 4646526064 to i32*, !dbg !11
+  %v0 = load i32, i32* %a0, align 16, !dbg !11
+
+  %a1 = inttoptr i64 4646526080 to i32*
+  %v1 = load i32, i32* %a1, align 16, !dbg !11
+
+  %a2 = inttoptr i64 4646526096 to i32*
+  %v2 = load i32, i32* %a2, align 16, !dbg !11
+
+  %r0 = add i32 %v0, %v1
+  %r1 = add i32 %r0, %v2
+  ret i32 %r1
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 313291)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "test-hoist-debug.cpp", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 6.0.0 (trunk 313291)"}
+!7 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DILocation(line: 2, column: 3, scope: !7)

Added: llvm/trunk/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/X86/delete-dead-cast-inst.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%T = type { i32, i32, i32, i32 }
+
+define i32 @test1() nounwind {
+; CHECK-LABEL:  @test1
+; CHECK:        %const = bitcast i32 12345678 to i32
+; CHECK-NOT:    %base = inttoptr i32 12345678 to %T*
+; CHECK-NEXT:   %1 = inttoptr i32 %const to %T*
+; CHECK-NEXT:   %addr1 = getelementptr %T, %T* %1, i32 0, i32 1
+; CHECK-NEXT:   %addr2 = getelementptr %T, %T* %1, i32 0, i32 2
+; CHECK-NEXT:   %addr3 = getelementptr %T, %T* %1, i32 0, i32 3
+  %base = inttoptr i32 12345678 to %T*
+  %addr1 = getelementptr %T, %T* %base, i32 0, i32 1
+  %addr2 = getelementptr %T, %T* %base, i32 0, i32 2
+  %addr3 = getelementptr %T, %T* %base, i32 0, i32 3
+  ret i32 12345678
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/X86/ehpad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/ehpad.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/X86/ehpad.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/X86/ehpad.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,70 @@
+; RUN: opt -S -consthoist -consthoist-with-block-frequency=false < %s | FileCheck %s
+; RUN: opt -S -consthoist -consthoist-with-block-frequency=true < %s | FileCheck --check-prefix=BFIHOIST %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; CHECK-LABEL: define i32 @main
+; CHECK: %tobool = icmp eq i32 %argc, 0
+; CHECK-NEXT: bitcast i64 9209618997431186100 to i64
+; CHECK-NEXT: br i1 %tobool
+
+; BFIHOIST-LABEL: define i32 @main
+; BFIHOIST: then:
+; BFIHOIST: %[[CONST1:.*]] = bitcast i64 9209618997431186100 to i64
+; BFIHOIST: %add = add i64 %call4, %[[CONST1]]
+; BFIHOIST: br label %endif
+; BFIHOIST: else:
+; BFIHOIST: %[[CONST2:.*]] = bitcast i64 9209618997431186100 to i64
+; BFIHOIST: %add6 = add i64 %call5, %[[CONST2]]
+; BFIHOIST: br label %endif
+
+; Function Attrs: norecurse
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+  %call = tail call i64 @fn(i64 0)
+  %call1 = tail call i64 @fn(i64 1)
+  %tobool = icmp eq i32 %argc, 0
+  br i1 %tobool, label %2, label %1
+
+; <label>:1:                                      ; preds = %0
+  %call2 = invoke i64 @fn(i64 %call)
+          to label %6 unwind label %catch.dispatch
+
+; <label>:2:                                      ; preds = %0
+  %call3 = invoke i64 @fn(i64 %call1)
+          to label %6 unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %2, %1
+  %z.0 = phi i64 [ %call, %1 ], [ %call1, %2 ]
+  %3 = catchswitch within none [label %4] unwind to caller
+
+; <label>:4:                                      ; preds = %catch.dispatch
+  %5 = catchpad within %3 [i8* null, i32 64, i8* null]
+  br i1 %tobool, label %then, label %else
+
+then:
+  %call4 = tail call i64 @fn(i64 %z.0) [ "funclet"(token %5) ]
+  %add = add i64 %call4, 9209618997431186100
+  br label %endif
+
+else:
+  %call5 = tail call i64 @fn(i64 0) [ "funclet"(token %5) ]
+  %add6 = add i64 %call5, 9209618997431186100
+  br label %endif
+
+endif:
+  %v = phi i64 [ %add, %then ], [ %add6, %else ]
+  %call7 = tail call i64 @fn(i64 %v) [ "funclet"(token %5) ]
+  %call8 = tail call i64 @fn(i64 %call7) [ "funclet"(token %5) ]
+  catchret from %5 to label %6
+
+; <label>:6:                                      ; preds = %1, %2, %4
+  ret i32 0
+}
+
+declare i64 @fn(i64) local_unnamed_addr #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+attributes #0 = { norecurse "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

Added: llvm/trunk/test/Transforms/ConstantHoisting/X86/large-immediate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/large-immediate.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/X86/large-immediate.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/X86/large-immediate.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt -mtriple=x86_64-darwin-unknown -S -consthoist < %s | FileCheck %s
+
+define i128 @test1(i128 %a) nounwind {
+; CHECK-LABEL: test1
+; CHECK: %const = bitcast i128 12297829382473034410122878 to i128
+  %1 = add i128 %a, 12297829382473034410122878
+  %2 = add i128 %1, 12297829382473034410122878
+  ret i128 %2
+}
+
+; Check that we don't hoist the shift value of a shift instruction.
+define i512 @test2(i512 %a) nounwind {
+; CHECK-LABEL: test2
+; CHECK-NOT: %const = bitcast i512 504 to i512
+  %1 = shl i512 %a, 504
+  %2 = ashr i512 %1, 504
+  ret i512 %2
+}
+
+; Check that we don't hoist constants with a type larger than i128.
+define i196 @test3(i196 %a) nounwind {
+; CHECK-LABEL: test3
+; CHECK-NOT: %const = bitcast i196 2 to i196
+  %1 = mul i196 %a, 2
+  %2 = mul i196 %1, 2
+  ret i196 %2
+}
+
+; Check that we don't hoist immediates with small values.
+define i96 @test4(i96 %a) nounwind {
+; CHECK-LABEL: test4
+; CHECK-NOT: %const = bitcast i96 2 to i96
+  %1 = mul i96 %a, 2
+  %2 = add i96 %1, 2
+  ret i96 %2
+}

Added: llvm/trunk/test/Transforms/ConstantHoisting/X86/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/X86/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/X86/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/X86/phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/X86/phi.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/X86/phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,117 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+; RUN: opt -S -passes=consthoist < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; PR18626
+define i8* @test1(i1 %cmp, i64* %tmp) {
+entry:
+  call void @foo(i8* inttoptr (i64 68719476735 to i8*))
+  br i1 %cmp, label %if.end, label %return
+
+if.end:                                           ; preds = %bb1
+  call void @foo(i8* inttoptr (i64 68719476736 to i8*))
+  br label %return
+
+return:
+  %retval.0 = phi i8* [ null, %entry ], [ inttoptr (i64 68719476736 to i8*), %if.end ]
+  store i64 1172321806, i64* %tmp
+  ret i8* %retval.0
+
+; CHECK-LABEL: @test1
+; CHECK: if.end:
+; CHECK: %2 = inttoptr i64 %const to i8*
+; CHECK-NEXT: br
+; CHECK: return:
+; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ %2, %if.end ]
+}
+
+define void @test2(i1 %cmp, i64** %tmp) {
+entry:
+  call void @foo(i8* inttoptr (i64 68719476736 to i8*))
+  br i1 %cmp, label %if.end, label %return
+
+if.end:                                           ; preds = %bb1
+  call void @foo(i8* inttoptr (i64 68719476736 to i8*))
+  br label %return
+
+return:
+  store i64* inttoptr (i64 68719476735 to i64*), i64** %tmp
+  ret void
+
+; CHECK-LABEL: @test2
+; CHECK: return:
+; CHECK-NEXT: %const_mat = add i64 %const, -1
+; CHECK-NEXT: inttoptr i64 %const_mat to i64*
+}
+
+declare void @foo(i8*)
+
+; PR18768
+define i32 @test3(i1 %c) {
+entry:
+  br i1 %c, label %if.then, label %if.end3
+
+if.then:                                          ; preds = %entry
+  br label %if.end3
+
+if.end3:                                          ; preds = %if.then, %entry
+  %d.0 = phi i32* [ inttoptr (i64 985162435264511 to i32*), %entry ], [ null, %if.then ]
+  %cmp4 = icmp eq i32* %d.0, inttoptr (i64 985162435264511 to i32*)
+  %cmp6 = icmp eq i32* %d.0, inttoptr (i64 985162418487296 to i32*)
+  %or = or i1 %cmp4, %cmp6
+  br i1 %or, label %if.then8, label %if.end9
+
+if.then8:                                         ; preds = %if.end3
+  ret i32 1
+
+if.end9:                                          ; preds = %if.then8, %if.end3
+  ret i32 undef
+}
+
+; <rdar://problem/16394449>
+define i64 @switch_test1(i64 %a) {
+; CHECK-LABEL: @switch_test1
+; CHECK: %0 = phi i64 [ %const, %case2 ], [ %const_mat, %Entry ], [ %const_mat, %Entry ]
+Entry:
+  %sel = add i64 %a, 4519019440
+  switch i64 %sel, label %fail [
+    i64 462, label %continuation
+    i64 449, label %case2
+    i64 443, label %continuation
+  ]
+
+case2:
+  br label %continuation
+
+continuation:
+  %0 = phi i64 [ 4519019440, %case2 ], [ 4519019460, %Entry ], [ 4519019460, %Entry ]
+  ret i64 0;
+
+fail:
+  ret i64 -1;
+}
+
+define i64 @switch_test2(i64 %a) {
+; CHECK-LABEL: @switch_test2
+; CHECK: %2 = phi i64* [ %1, %case2 ], [ %0, %Entry ], [ %0, %Entry ]
+Entry:
+  %sel = add i64 %a, 4519019440
+  switch i64 %sel, label %fail [
+    i64 462, label %continuation
+    i64 449, label %case2
+    i64 443, label %continuation
+  ]
+
+case2:
+  br label %continuation
+
+continuation:
+  %0 = phi i64* [ inttoptr(i64 4519019440 to i64*), %case2 ], [ inttoptr(i64 4519019460 to i64*), %Entry ], [ inttoptr(i64 4519019460 to i64*), %Entry ]
+  ret i64 0;
+
+fail:
+  ret i64 -1;
+}
+

Added: llvm/trunk/test/Transforms/ConstantHoisting/X86/stackmap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/X86/stackmap.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/X86/stackmap.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/X86/stackmap.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,17 @@
+; RUN: opt -S -consthoist < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Test if the 3rd argument of a stackmap is hoisted.
+define i128 @test1(i128 %a) {
+; CHECK-LABEL:  @test1
+; CHECK:        %const = bitcast i128 134646182756734033220 to i128
+; CHECK:        tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 1, i32 24, i128 %const)
+entry:
+  %0 = add i128 %a, 134646182756734033220
+  tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 1, i32 24, i128 134646182756734033220)
+  ret i128 %0
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)

Added: llvm/trunk/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll (added)
+++ llvm/trunk/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,13 @@
+; RUN: opt < %s -constmerge > /dev/null
+
+ at foo.upgrd.1 = internal constant { i32 } { i32 7 }              ; <{ i32 }*> [#uses=1]
+ at bar = internal constant { i32 } { i32 7 }              ; <{ i32 }*> [#uses=1]
+
+declare i32 @test(i32*)
+
+define void @foo() {
+        call i32 @test( i32* getelementptr ({ i32 }, { i32 }* @foo.upgrd.1, i64 0, i32 0) )              ; <i32>:1 [#uses=0]
+        call i32 @test( i32* getelementptr ({ i32 }, { i32 }* @bar, i64 0, i32 0) )              ; <i32>:2 [#uses=0]
+        ret void
+}
+

Added: llvm/trunk/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll (added)
+++ llvm/trunk/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,7 @@
+; RUN: opt -S -constmerge < %s | FileCheck %s
+
+; CHECK: @foo = constant i32 6
+; CHECK: @bar = constant i32 6
+ at foo = constant i32 6           ; <i32*> [#uses=0]
+ at bar = constant i32 6           ; <i32*> [#uses=0]
+

Added: llvm/trunk/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll (added)
+++ llvm/trunk/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt -constmerge -S < %s | FileCheck %s
+; PR8978
+
+declare i32 @zed(%struct.foobar*, %struct.foobar*)
+
+%struct.foobar = type { i32 }
+; CHECK: bar.d
+ at bar.d =  unnamed_addr constant %struct.foobar zeroinitializer, align 4
+; CHECK-NOT: foo.d
+ at foo.d = internal constant %struct.foobar zeroinitializer, align 4
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: bar.d
+  %call2 = tail call i32 @zed(%struct.foobar* @foo.d, %struct.foobar* @bar.d)
+nounwind
+  ret i32 0
+}
+

Added: llvm/trunk/test/Transforms/ConstantMerge/align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantMerge/align.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantMerge/align.ll (added)
+++ llvm/trunk/test/Transforms/ConstantMerge/align.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt -constmerge -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+
+; Test that with a TD we do merge and mark the alignment as 4
+ at T1A = internal unnamed_addr constant i32 1
+ at T1B = internal unnamed_addr constant i32 1, align 2
+; CHECK: @T1B = internal unnamed_addr constant i32 1, align 4
+
+define void @test1(i32** %P1, i32** %P2) {
+  store i32* @T1A, i32** %P1
+  store i32* @T1B, i32** %P2
+  ret void
+}
+
+
+; Test that even with a TD we set the alignment to the maximum if both constants
+; have explicit alignments.
+ at T2A = internal unnamed_addr constant i32 2, align 1
+ at T2B = internal unnamed_addr constant i32 2, align 2
+; CHECK: @T2B = internal unnamed_addr constant i32 2, align 2
+
+define void @test2(i32** %P1, i32** %P2) {
+  store i32* @T2A, i32** %P1
+  store i32* @T2B, i32** %P2
+  ret void
+}

Added: llvm/trunk/test/Transforms/ConstantMerge/dont-merge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantMerge/dont-merge.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantMerge/dont-merge.ll (added)
+++ llvm/trunk/test/Transforms/ConstantMerge/dont-merge.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,82 @@
+; RUN: opt < %s -constmerge -S | FileCheck %s
+
+; Don't merge constants with specified sections.
+
+ at T1G1 = internal constant i32 1, section "foo"
+ at T1G2 = internal constant i32 1, section "bar"
+ at T1G3 = internal constant i32 1, section "bar"
+
+; CHECK: @T1G1
+; CHECK: @T1G2
+; CHECK: @T1G3
+
+define void @test1(i32** %P1, i32** %P2, i32** %P3) {
+        store i32* @T1G1, i32** %P1
+        store i32* @T1G2, i32** %P2
+        store i32* @T1G3, i32** %P3
+        ret void
+}
+
+ at T2a = internal constant i32 224
+ at T2b = internal addrspace(30) constant i32 224
+
+; CHECK: @T2a
+; CHECK: @T2b
+
+define void @test2(i32** %P1, i32 addrspace(30)** %P2) {
+        store i32* @T2a, i32** %P1
+        store i32 addrspace(30)*  @T2b, i32 addrspace(30)** %P2
+        ret void
+}
+
+; PR8144 - Don't merge globals marked attribute(used)
+; CHECK: @T3A = 
+; CHECK: @T3B = 
+
+ at T3A = internal constant i32 0
+ at T3B = internal constant i32 0
+ at llvm.used = appending global [2 x i32*] [i32* @T3A, i32* @T3B], section
+"llvm.metadata"
+
+define void @test3() {
+  call void asm sideeffect "T3A, T3B",""() ; invisible use of T3A and T3B
+  ret void
+}
+
+; Don't merge constants with !type annotations.
+
+ at T4A1 = internal constant i32 2, !type !0
+ at T4A2 = internal unnamed_addr constant i32 2, !type !1
+
+ at T4B1 = internal constant i32 3, !type !0
+ at T4B2 = internal unnamed_addr constant i32 3, !type !0
+
+ at T4C1 = internal constant i32 4, !type !0
+ at T4C2 = unnamed_addr constant i32 4
+
+ at T4D1 = unnamed_addr constant i32 5, !type !0
+ at T4D2 = internal constant i32 5
+
+!0 = !{i64 0, !"typeinfo name for A"}
+!1 = !{i64 0, !"typeinfo name for B"}
+
+; CHECK: @T4A1
+; CHECK: @T4A2
+; CHECK: @T4B1
+; CHECK: @T4B2
+; CHECK: @T4C1
+; CHECK: @T4C2
+; CHECK: @T4D1
+; CHECK: @T4D2
+
+define void @test4(i32** %P1, i32** %P2, i32** %P3, i32** %P4, i32** %P5, i32** %P6, i32** %P7, i32** %P8) {
+        store i32* @T4A1, i32** %P1
+        store i32* @T4A2, i32** %P2
+        store i32* @T4B1, i32** %P3
+        store i32* @T4B2, i32** %P4
+        store i32* @T4C1, i32** %P5
+        store i32* @T4C2, i32** %P6
+        store i32* @T4D1, i32** %P7
+        store i32* @T4D2, i32** %P8
+        ret void
+}

Added: llvm/trunk/test/Transforms/ConstantMerge/merge-both.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantMerge/merge-both.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantMerge/merge-both.ll (added)
+++ llvm/trunk/test/Transforms/ConstantMerge/merge-both.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,41 @@
+; RUN: opt -S < %s -passes=constmerge | FileCheck %s
+; Test that in one run var3 is merged into var2 and var1 into var4.
+; Test that we merge @var5 and @var6 into one with the higher alignment
+
+declare void @zed(%struct.foobar*, %struct.foobar*)
+
+%struct.foobar = type { i32 }
+
+ at var1 = internal constant %struct.foobar { i32 2 }
+ at var2 = unnamed_addr constant %struct.foobar { i32 2 }
+ at var3 = internal constant %struct.foobar { i32 2 }
+ at var4 = unnamed_addr constant %struct.foobar { i32 2 }
+
+; CHECK:      %struct.foobar = type { i32 }
+; CHECK-NOT: @
+; CHECK: @var2 = constant %struct.foobar { i32 2 }
+; CHECK-NEXT: @var4 = constant %struct.foobar { i32 2 }
+
+declare void @helper([16 x i8]*)
+ at var5 = internal constant [16 x i8] c"foo1bar2foo3bar\00", align 16
+ at var6 = private unnamed_addr constant [16 x i8] c"foo1bar2foo3bar\00", align 1
+ at var7 = internal constant [16 x i8] c"foo1bar2foo3bar\00"
+ at var8 = private unnamed_addr constant [16 x i8] c"foo1bar2foo3bar\00"
+
+; CHECK-NEXT: @var7 = internal constant [16 x i8] c"foo1bar2foo3bar\00"
+; CHECK-NEXT: @var8 = private constant [16 x i8] c"foo1bar2foo3bar\00", align 16
+
+ at var4a = alias %struct.foobar, %struct.foobar* @var4
+ at llvm.used = appending global [1 x %struct.foobar*] [%struct.foobar* @var4a], section "llvm.metadata"
+
+define i32 @main() {
+entry:
+  call void @zed(%struct.foobar* @var1, %struct.foobar* @var2)
+  call void @zed(%struct.foobar* @var3, %struct.foobar* @var4)
+  call void @helper([16 x i8]* @var5)
+  call void @helper([16 x i8]* @var6)
+  call void @helper([16 x i8]* @var7)
+  call void @helper([16 x i8]* @var8)
+  ret i32 0
+}
+

Added: llvm/trunk/test/Transforms/ConstantMerge/merge-dbg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantMerge/merge-dbg.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantMerge/merge-dbg.ll (added)
+++ llvm/trunk/test/Transforms/ConstantMerge/merge-dbg.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; RUN: opt < %s -constmerge -S | FileCheck %s
+
+; CHECK: = constant i32 1, !dbg [[A:![0-9]+]], !dbg [[B:![0-9]+]]
+ at a = internal constant i32 1, !dbg !0
+ at b = unnamed_addr constant i32 1, !dbg !9
+
+define void @test1(i32** %P1, i32** %P2) {
+  store i32* @a, i32** %P1
+  store i32* @b, i32** %P2
+  ret void
+}
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8}
+
+; CHECK: [[A]] = !DIGlobalVariableExpression(var: [[VA:![0-9]+]], expr: !DIExpression())
+; CHECK: [[VA]] = distinct !DIGlobalVariable(name: "y"
+; CHECK: [[B]] = !DIGlobalVariableExpression(var: [[VB:![0-9]+]], expr: !DIExpression())
+; CHECK: [[VB]] = distinct !DIGlobalVariable(name: "x"
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 5.0.0 (trunk 297227) (llvm/trunk 297234)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
+!3 = !DIFile(filename: "1.cc", directory: "/build")
+!4 = !{}
+!5 = !{!0}
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+
+!9 = !DIGlobalVariableExpression(var: !10, expr: !DIExpression())
+!10 = distinct !DIGlobalVariable(name: "y", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true)

Added: llvm/trunk/test/Transforms/ConstantMerge/unnamed-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantMerge/unnamed-addr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantMerge/unnamed-addr.ll (added)
+++ llvm/trunk/test/Transforms/ConstantMerge/unnamed-addr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt -constmerge -S < %s | FileCheck %s
+; Test which corresponding x and y are merged and that unnamed_addr
+; is correctly set.
+
+declare void @zed(%struct.foobar*, %struct.foobar*)
+
+%struct.foobar = type { i32 }
+
+ at test1.x = internal constant %struct.foobar { i32 1 }
+ at test1.y = constant %struct.foobar { i32 1 }
+
+ at test2.x = internal constant %struct.foobar { i32 2 }
+ at test2.y = unnamed_addr constant %struct.foobar { i32 2 }
+
+ at test3.x = internal unnamed_addr constant %struct.foobar { i32 3 }
+ at test3.y = constant %struct.foobar { i32 3 }
+
+ at test4.x = internal unnamed_addr constant %struct.foobar { i32 4 }
+ at test4.y = unnamed_addr constant %struct.foobar { i32 4 }
+
+
+; CHECK:      %struct.foobar = type { i32 }
+; CHECK-NOT: @
+; CHECK: @test1.x = internal constant %struct.foobar { i32 1 }
+; CHECK-NEXT: @test1.y = constant %struct.foobar { i32 1 }
+; CHECK-NEXT: @test2.y = constant %struct.foobar { i32 2 }
+; CHECK-NEXT: @test3.y = constant %struct.foobar { i32 3 }
+; CHECK-NEXT: @test4.y = unnamed_addr constant %struct.foobar { i32 4 }
+; CHECK-NOT: @
+; CHECK: declare void @zed(%struct.foobar*, %struct.foobar*)
+
+define i32 @main() {
+entry:
+  call void @zed(%struct.foobar* @test1.x, %struct.foobar* @test1.y)
+  call void @zed(%struct.foobar* @test2.x, %struct.foobar* @test2.y)
+  call void @zed(%struct.foobar* @test3.x, %struct.foobar* @test3.y)
+  call void @zed(%struct.foobar* @test4.x, %struct.foobar* @test4.y)
+  ret i32 0
+}
+

Added: llvm/trunk/test/Transforms/Coroutines/ArgAddr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/ArgAddr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/ArgAddr.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/ArgAddr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; Need to move users of allocas that were moved into the coroutine frame after
+; coro.begin.
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define nonnull i8* @f(i32 %n) {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null);
+  %n.addr = alloca i32
+  store i32 %n, i32* %n.addr ; this needs to go after coro.begin
+  %0 = tail call i32 @llvm.coro.size.i32()
+  %call = tail call i8* @malloc(i32 %0)
+  %1 = tail call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %call)
+  %2 = bitcast i32* %n.addr to i8*
+  call void @ctor(i8* %2)
+  br label %for.cond
+
+for.cond:
+  %3 = load i32, i32* %n.addr
+  %dec = add nsw i32 %3, -1
+  store i32 %dec, i32* %n.addr
+  call void @print(i32 %3)
+  %4 = call i8 @llvm.coro.suspend(token none, i1 false)
+  %conv = sext i8 %4 to i32
+  switch i32 %conv, label %coro_Suspend [
+    i32 0, label %for.cond
+    i32 1, label %coro_Cleanup
+  ]
+
+coro_Cleanup:
+  %5 = call i8* @llvm.coro.free(token %id, i8* nonnull %1)
+  call void @free(i8* %5)
+  br label %coro_Suspend
+
+coro_Suspend:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret i8* %1
+}
+
+; CHECK-LABEL: @main
+define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+; CHECK:      call void @ctor
+; CHECK-NEXT: call void @print(i32 4)
+; CHECK-NEXT: call void @print(i32 3)
+; CHECK-NEXT: call void @print(i32 2)
+; CHECK:      ret i32 0
+}
+
+declare i8* @malloc(i32)
+declare void @free(i8*)
+declare void @print(i32)
+declare void @ctor(i8* nocapture readonly)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/coro-catchswitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-catchswitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-catchswitch.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-catchswitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,88 @@
+; Verifies that we can insert the spill for a PHI preceding the catchswitch
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+; CHECK-LABEL: define void @f(
+define void @f(i1 %cond) "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %id = call token @llvm.coro.id(i32 8, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br i1 %cond, label %if.else, label %if.then
+
+if.then:
+  invoke void @may_throw1()
+          to label %coro.ret unwind label %catch.dispatch
+
+if.else:
+  invoke void @may_throw2()
+          to label %coro.ret unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %if.else, %if.then
+  %val = phi i32 [ 1, %if.then ], [ 2, %if.else ]
+  %switch = catchswitch within none [label %catch] unwind label %cleanuppad
+
+; Verifies that we split out the PHI into a separate block
+; added a cleanuppad spill cleanupret unwinding into the catchswitch.
+
+; CHECK: catch.dispatch:
+; CHECK:  %val = phi i32 [ 2, %if.else ], [ 1, %if.then ]
+; CHECK:  %[[Pad:.+]] = cleanuppad within none []
+; CHECK:  %val.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK:  store i32 %val, i32* %val.spill.addr
+; CHECK:  cleanupret from %[[Pad]] unwind label %[[Switch:.+]]
+
+; CHECK: [[Switch]]:
+; CHECK: %switch = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %pad = catchpad within %switch [i8* null, i32 64, i8* null]
+  catchret from %pad to label %suspend
+
+suspend:
+  %sp = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp, label %coro.ret [
+    i8 0, label %resume
+    i8 1, label %coro.ret
+  ]
+
+resume:                                   ; preds = %await2.suspend
+  call void @print(i32 %val)
+  br label %coro.ret
+
+coro.ret:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+    ret void
+
+cleanuppad:
+  %cpad = cleanuppad within none []
+  cleanupret from %cpad unwind to caller
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1
+
+; Function Attrs: nounwind
+declare i1 @llvm.coro.alloc(token) #2
+
+; Function Attrs: nobuiltin
+declare i32 @llvm.coro.size.i32() #4
+declare i8* @llvm.coro.begin(token, i8* writeonly) #2
+declare token @llvm.coro.save(i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+
+declare void @may_throw1()
+declare void @may_throw2()
+declare void @print(i32)
+declare noalias i8* @malloc(i32)
+declare void @free(i8*)
+
+declare i1 @llvm.coro.end(i8*, i1) #2
+
+; Function Attrs: nobuiltin nounwind
+
+; Function Attrs: argmemonly nounwind readonly
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1

Added: llvm/trunk/test/Transforms/Coroutines/coro-cleanup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-cleanup.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-cleanup.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-cleanup.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; Make sure that all library helper coro intrinsics are lowered.
+; RUN: opt < %s -O0 -enable-coroutines -S | FileCheck %s
+
+; CHECK-LABEL: @uses_library_support_coro_intrinsics(
+; CHECK-NOT:     @llvm.coro
+; CHECK:         ret void
+define void @uses_library_support_coro_intrinsics(i8* %hdl) {
+entry:
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.destroy(i8* %hdl)
+  call i1 @llvm.coro.done(i8* %hdl)
+  ret void
+}
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+declare i1 @llvm.coro.done(i8*)
+

Added: llvm/trunk/test/Transforms/Coroutines/coro-debug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-debug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-debug.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-debug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,142 @@
+; Tests that debug information is sane after coro-split
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+source_filename = "simple-repro.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind
+define i8* @f(i32 %x) #0 !dbg !6 {
+entry:
+  %x.addr = alloca i32, align 4
+  %coro_hdl = alloca i8*, align 8
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+  call void @llvm.dbg.declare(metadata i8** %coro_hdl, metadata !15, metadata !13), !dbg !16
+  %0 = call token @llvm.coro.id(i32 0, i8* null, i8* bitcast (i8* (i32)* @f to i8*), i8* null), !dbg !16
+  %1 = call i64 @llvm.coro.size.i64(), !dbg !16
+  %call = call i8* @malloc(i64 %1), !dbg !16
+  %2 = call i8* @llvm.coro.begin(token %0, i8* %call) #7, !dbg !16
+  store i8* %2, i8** %coro_hdl, align 8, !dbg !16
+  %3 = call i8 @llvm.coro.suspend(token none, i1 false), !dbg !17
+  %conv = sext i8 %3 to i32, !dbg !17
+  call void @coro.devirt.trigger(i8* null)
+  switch i32 %conv, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+  ], !dbg !17
+
+sw.bb:                                            ; preds = %entry
+  br label %sw.epilog, !dbg !18
+
+sw.bb1:                                           ; preds = %entry
+  br label %coro_Cleanup, !dbg !18
+
+sw.default:                                       ; preds = %entry
+  br label %coro_Suspend, !dbg !18
+
+sw.epilog:                                        ; preds = %sw.bb
+  %4 = load i32, i32* %x.addr, align 4, !dbg !20
+  %add = add nsw i32 %4, 1, !dbg !21
+  store i32 %add, i32* %x.addr, align 4, !dbg !22
+  br label %coro_Cleanup, !dbg !23
+
+coro_Cleanup:                                     ; preds = %sw.epilog, %sw.bb1
+  %5 = load i8*, i8** %coro_hdl, align 8, !dbg !24
+  %6 = call i8* @llvm.coro.free(token %0, i8* %5), !dbg !24
+  call void @free(i8* %6), !dbg !24
+  br label %coro_Suspend, !dbg !24
+
+coro_Suspend:                                     ; preds = %coro_Cleanup, %sw.default
+  %7 = call i1 @llvm.coro.end(i8* null, i1 false) #7, !dbg !24
+  %8 = load i8*, i8** %coro_hdl, align 8, !dbg !24
+  ret i8* %8, !dbg !24
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: argmemonly nounwind readonly
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #2
+
+declare i8* @malloc(i64) #3
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.coro.size.i64() #4
+
+; Function Attrs: nounwind
+declare i8* @llvm.coro.begin(token, i8* writeonly) #5
+
+; Function Attrs: nounwind
+declare i8 @llvm.coro.suspend(token, i1) #5
+
+declare void @free(i8*) #3
+
+; Function Attrs: argmemonly nounwind readonly
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2
+
+; Function Attrs: nounwind
+declare i1 @llvm.coro.end(i8*, i1) #5
+
+; Function Attrs: alwaysinline
+define private void @coro.devirt.trigger(i8*) #6 {
+entry:
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #2
+
+attributes #0 = { noinline nounwind "coroutine.presplit"="1" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { argmemonly nounwind readonly }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind readnone }
+attributes #5 = { nounwind }
+attributes #6 = { alwaysinline }
+attributes #7 = { noduplicate }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "<stdin>", directory: "C:\5CGitHub\5Cllvm\5Cbuild\5CDebug\5Cbin")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 5.0.0"}
+!6 = distinct !DISubprogram(name: "f", linkageName: "flink", scope: !7, file: !7, line: 55, type: !8, isLocal: false, isDefinition: true, scopeLine: 55, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!7 = !DIFile(filename: "simple-repro.c", directory: "C:\5CGitHub\5Cllvm\5Cbuild\5CDebug\5Cbin")
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !11}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !DILocalVariable(name: "x", arg: 1, scope: !6, file: !7, line: 55, type: !11)
+!13 = !DIExpression()
+!14 = !DILocation(line: 55, column: 13, scope: !6)
+!15 = !DILocalVariable(name: "coro_hdl", scope: !6, file: !7, line: 56, type: !10)
+!16 = !DILocation(line: 56, column: 3, scope: !6)
+!17 = !DILocation(line: 58, column: 5, scope: !6)
+!18 = !DILocation(line: 58, column: 5, scope: !19)
+!19 = distinct !DILexicalBlock(scope: !6, file: !7, line: 58, column: 5)
+!20 = !DILocation(line: 59, column: 9, scope: !6)
+!21 = !DILocation(line: 59, column: 10, scope: !6)
+!22 = !DILocation(line: 59, column: 7, scope: !6)
+!23 = !DILocation(line: 59, column: 5, scope: !6)
+!24 = !DILocation(line: 62, column: 3, scope: !6)
+
+; CHECK: define i8* @f(i32 %x) #0 !dbg ![[ORIG:[0-9]+]]
+; CHECK: define internal fastcc void @f.resume(%f.Frame* %FramePtr) #0 !dbg ![[RESUME:[0-9]+]]
+; CHECK: define internal fastcc void @f.destroy(%f.Frame* %FramePtr) #0 !dbg ![[DESTROY:[0-9]+]]
+; CHECK: define internal fastcc void @f.cleanup(%f.Frame* %FramePtr) #0 !dbg ![[CLEANUP:[0-9]+]]
+
+; CHECK: ![[ORIG]] = distinct !DISubprogram(name: "f", linkageName: "flink"
+; CHECK: !DILocalVariable(name: "x", arg: 1, scope: ![[ORIG]]
+
+; CHECK: ![[RESUME]] = distinct !DISubprogram(name: "f", linkageName: "flink"
+; CHECK: !DILocalVariable(name: "x", arg: 1, scope: ![[RESUME]]
+
+; CHECK: ![[DESTROY]] = distinct !DISubprogram(name: "f", linkageName: "flink"
+
+; CHECK: ![[CLEANUP]] = distinct !DISubprogram(name: "f", linkageName: "flink"

Added: llvm/trunk/test/Transforms/Coroutines/coro-early.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-early.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-early.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-early.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,60 @@
+; Tests that CoroEarly pass correctly lowers coro.resume, coro.destroy
+; and other intrinsics managed by this pass.
+; RUN: opt < %s -S -coro-early | FileCheck %s
+
+; CHECK: %NoopCoro.Frame = type { void (%NoopCoro.Frame*)*, void (%NoopCoro.Frame*)* }
+; CHECK: @NoopCoro.Frame.Const = private constant %NoopCoro.Frame { void (%NoopCoro.Frame*)* @NoopCoro.ResumeDestroy, void (%NoopCoro.Frame*)* @NoopCoro.ResumeDestroy }
+
+; CHECK-LABEL: @callResume(
+define void @callResume(i8* %hdl) {
+; CHECK-NEXT: entry
+entry:
+; CHECK-NEXT: %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+; CHECK-NEXT: %1 = bitcast i8* %0 to void (i8*)*
+; CHECK-NEXT: call fastcc void %1(i8* %hdl)
+  call void @llvm.coro.resume(i8* %hdl)
+
+; CHECK-NEXT: %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+; CHECK-NEXT: %3 = bitcast i8* %2 to void (i8*)*
+; CHECK-NEXT: call fastcc void %3(i8* %hdl)
+  call void @llvm.coro.destroy(i8* %hdl)
+
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; CHECK-LABEL: @eh(
+define void @eh(i8* %hdl) personality i8* null {
+; CHECK-NEXT: entry
+entry:
+;  CHECK-NEXT: %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+;  CHECK-NEXT: %1 = bitcast i8* %0 to void (i8*)*
+;  CHECK-NEXT: invoke fastcc void %1(i8* %hdl)
+  invoke void @llvm.coro.resume(i8* %hdl)
+          to label %cont unwind label %ehcleanup
+cont:
+  ret void
+
+ehcleanup:
+  %0 = cleanuppad within none []
+  cleanupret from %0 unwind to caller
+}
+
+
+; CHECK-LABEL: @noop(
+define i8* @noop() {
+; CHECK-NEXT: entry
+entry:
+; CHECK-NEXT: ret i8* bitcast (%NoopCoro.Frame* @NoopCoro.Frame.Const to i8*)
+  %n = call i8* @llvm.coro.noop()
+  ret i8* %n
+}
+
+; CHECK-LABEL: define private fastcc void @NoopCoro.ResumeDestroy(%NoopCoro.Frame*) {
+; CHECK-NEXT: entry
+; CHECK-NEXT:    ret void
+
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+declare i8* @llvm.coro.noop()

Added: llvm/trunk/test/Transforms/Coroutines/coro-eh-aware-edge-split.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-eh-aware-edge-split.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-eh-aware-edge-split.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-eh-aware-edge-split.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,218 @@
+; Check that we can handle edge splits leading into a landingpad
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: define internal fastcc void @f.resume(
+define void @f(i1 %cond) "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null)
+  %size = tail call i64 @llvm.coro.size.i64()
+  %alloc = call i8* @malloc(i64 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp, label %coro.ret [
+    i8 0, label %resume
+    i8 1, label %cleanup
+  ]
+
+resume:
+  br i1 %cond, label %invoke1, label %invoke2
+
+invoke1:
+  invoke void @may_throw1()
+          to label %unreach unwind label %pad.with.phi
+invoke2:
+  invoke void @may_throw2()
+          to label %unreach unwind label %pad.with.phi
+
+; Verify that we cloned landing pad on every edge and inserted a reload of the spilled value
+
+; CHECK: pad.with.phi.from.invoke2:
+; CHECK:   %0 = landingpad { i8*, i32 }
+; CHECK:           catch i8* null
+; CHECK:   br label %pad.with.phi
+
+; CHECK: pad.with.phi.from.invoke1:
+; CHECK:   %1 = landingpad { i8*, i32 }
+; CHECK:           catch i8* null
+; CHECK:   br label %pad.with.phi
+
+; CHECK: pad.with.phi:
+; CHECK:   %val = phi i32 [ 0, %pad.with.phi.from.invoke1 ], [ 1, %pad.with.phi.from.invoke2 ]
+; CHECK:   %lp = phi { i8*, i32 } [ %0, %pad.with.phi.from.invoke2 ], [ %1, %pad.with.phi.from.invoke1 ]
+; CHECK:   %exn = extractvalue { i8*, i32 } %lp, 0
+; CHECK:   call i8* @__cxa_begin_catch(i8* %exn)
+; CHECK:   call void @use_val(i32 %val)
+; CHECK:   call void @__cxa_end_catch()
+; CHECK:   call void @free(i8* %vFrame)
+; CHECK:   ret void
+
+pad.with.phi:
+  %val = phi i32 [ 0, %invoke1 ], [ 1, %invoke2 ]
+  %lp = landingpad { i8*, i32 }
+          catch i8* null
+  %exn = extractvalue { i8*, i32 } %lp, 0
+  call i8* @__cxa_begin_catch(i8* %exn)
+  call void @use_val(i32 %val)
+  call void @__cxa_end_catch()
+  br label %cleanup
+
+cleanup:                                        ; preds = %invoke.cont15, %if.else, %if.then, %ehcleanup21, %init.suspend
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %coro.ret
+
+coro.ret:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+
+unreach:
+  unreachable
+}
+
+; CHECK-LABEL: define internal fastcc void @g.resume(
+define void @g(i1 %cond, i32 %x, i32 %y) "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null)
+  %size = tail call i64 @llvm.coro.size.i64()
+  %alloc = call i8* @malloc(i64 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp, label %coro.ret [
+    i8 0, label %resume
+    i8 1, label %cleanup
+  ]
+
+resume:
+  br i1 %cond, label %invoke1, label %invoke2
+
+invoke1:
+  invoke void @may_throw1()
+          to label %unreach unwind label %pad.with.phi
+invoke2:
+  invoke void @may_throw2()
+          to label %unreach unwind label %pad.with.phi
+
+; Verify that we created cleanuppads on every edge and inserted a reload of the spilled value
+
+; CHECK: pad.with.phi.from.invoke2:
+; CHECK:   %0 = cleanuppad within none []
+; CHECK:   %y.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 6
+; CHECK:   %y.reload = load i32, i32* %y.reload.addr
+; CHECK:   cleanupret from %0 unwind label %pad.with.phi
+
+; CHECK: pad.with.phi.from.invoke1:
+; CHECK:   %1 = cleanuppad within none []
+; CHECK:   %x.reload.addr = getelementptr inbounds %g.Frame, %g.Frame* %FramePtr, i32 0, i32 5
+; CHECK:   %x.reload = load i32, i32* %x.reload.addr
+; CHECK:   cleanupret from %1 unwind label %pad.with.phi
+
+; CHECK: pad.with.phi:
+; CHECK:   %val = phi i32 [ %x.reload, %pad.with.phi.from.invoke1 ], [ %y.reload, %pad.with.phi.from.invoke2 ]
+; CHECK:   %tok = cleanuppad within none []
+; CHECK:   call void @use_val(i32 %val)
+; CHECK:   cleanupret from %tok unwind to caller
+
+pad.with.phi:
+  %val = phi i32 [ %x, %invoke1 ], [ %y, %invoke2 ]
+  %tok = cleanuppad within none []
+  call void @use_val(i32 %val)
+  cleanupret from %tok unwind to caller
+
+cleanup:                                        ; preds = %invoke.cont15, %if.else, %if.then, %ehcleanup21, %init.suspend
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %coro.ret
+
+coro.ret:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+
+unreach:
+  unreachable
+}
+
+; CHECK-LABEL: define internal fastcc void @h.resume(
+define void @h(i1 %cond, i32 %x, i32 %y) "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null)
+  %size = tail call i64 @llvm.coro.size.i64()
+  %alloc = call i8* @malloc(i64 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp, label %coro.ret [
+    i8 0, label %resume
+    i8 1, label %cleanup
+  ]
+
+resume:
+  br i1 %cond, label %invoke1, label %invoke2
+
+invoke1:
+  invoke void @may_throw1()
+          to label %coro.ret unwind label %pad.with.phi
+invoke2:
+  invoke void @may_throw2()
+          to label %coro.ret unwind label %pad.with.phi
+
+; Verify that we created cleanuppads on every edge and inserted a reload of the spilled value
+
+; CHECK: pad.with.phi.from.invoke2:
+; CHECK:   %0 = cleanuppad within none []
+; CHECK:   %y.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 6
+; CHECK:   %y.reload = load i32, i32* %y.reload.addr
+; CHECK:   cleanupret from %0 unwind label %pad.with.phi
+
+; CHECK: pad.with.phi.from.invoke1:
+; CHECK:   %1 = cleanuppad within none []
+; CHECK:   %x.reload.addr = getelementptr inbounds %h.Frame, %h.Frame* %FramePtr, i32 0, i32 5
+; CHECK:   %x.reload = load i32, i32* %x.reload.addr
+; CHECK:   cleanupret from %1 unwind label %pad.with.phi
+
+; CHECK: pad.with.phi:
+; CHECK:   %val = phi i32 [ %x.reload, %pad.with.phi.from.invoke1 ], [ %y.reload, %pad.with.phi.from.invoke2 ]
+; CHECK:   %switch = catchswitch within none [label %catch] unwind to caller
+pad.with.phi:
+  %val = phi i32 [ %x, %invoke1 ], [ %y, %invoke2 ]
+  %switch = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %pad = catchpad within %switch [i8* null, i32 64, i8* null]
+  call void @use_val(i32 %val)
+  catchret from %pad to label %coro.ret
+
+cleanup:                                        ; preds = %invoke.cont15, %if.else, %if.then, %ehcleanup21, %init.suspend
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %coro.ret
+
+coro.ret:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*)
+declare noalias i8* @malloc(i64)
+declare i64 @llvm.coro.size.i64()
+declare i8* @llvm.coro.begin(token, i8* writeonly)
+
+; Function Attrs: nounwind
+declare token @llvm.coro.save(i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+
+; Function Attrs: argmemonly nounwind
+declare void @may_throw1()
+declare void @may_throw2()
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @use_val(i32)
+declare void @__cxa_end_catch()
+
+; Function Attrs: nounwind
+declare i1 @llvm.coro.end(i8*, i1)
+declare void @free(i8*)
+declare i8* @llvm.coro.free(token, i8* nocapture readonly)

Added: llvm/trunk/test/Transforms/Coroutines/coro-elide.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-elide.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-elide.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-elide.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,112 @@
+; Tests that the coro.destroy and coro.resume are devirtualized where possible,
+; SCC pipeline restarts and inlines the direct calls.
+; RUN: opt < %s -S -inline -coro-elide -dce | FileCheck %s
+
+declare void @print(i32) nounwind
+
+; resume part of the coroutine
+define fastcc void @f.resume(i8*) {
+  tail call void @print(i32 0)
+  ret void
+}
+
+; destroy part of the coroutine
+define fastcc void @f.destroy(i8*) {
+  tail call void @print(i32 1)
+  ret void
+}
+
+ at f.resumers = internal constant [2 x void (i8*)*] [void (i8*)* @f.resume,
+                                                   void (i8*)* @f.destroy]
+
+; a coroutine start function
+define i8* @f() {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null,
+                          i8* bitcast (i8*()* @f to i8*),
+                          i8* bitcast ([2 x void (i8*)*]* @f.resumers to i8*))
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* null)
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @callResume(
+define void @callResume() {
+entry:
+  %hdl = call i8* @f()
+
+; CHECK: call void @print(i32 0)
+  %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %1 = bitcast i8* %0 to void (i8*)*
+  call fastcc void %1(i8* %hdl)
+
+; CHECK-NEXT: call void @print(i32 1)
+  %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %3 = bitcast i8* %2 to void (i8*)*
+  call fastcc void %3(i8* %hdl)
+
+; CHECK-NEXT: ret void
+  ret void
+}
+
+; CHECK-LABEL: @eh(
+define void @eh() personality i8* null {
+entry:
+  %hdl = call i8* @f()
+
+; CHECK: call void @print(i32 0)
+  %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %1 = bitcast i8* %0 to void (i8*)*
+  invoke void %1(i8* %hdl)
+          to label %cont unwind label %ehcleanup
+cont:
+  ret void
+
+ehcleanup:
+  %tok = cleanuppad within none []
+  cleanupret from %tok unwind to caller
+}
+
+; CHECK-LABEL: @no_devirt_info_null(
+; no devirtualization here, since coro.begin info parameter is null
+define void @no_devirt_info_null() {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* null)
+
+; CHECK: call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %1 = bitcast i8* %0 to void (i8*)*
+  call fastcc void %1(i8* %hdl)
+
+; CHECK: call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %3 = bitcast i8* %2 to void (i8*)*
+  call fastcc void %3(i8* %hdl)
+
+; CHECK: ret void
+  ret void
+}
+
+; CHECK-LABEL: @no_devirt_no_begin(
+; no devirtualization here, since coro.begin is not visible
+define void @no_devirt_no_begin(i8* %hdl) {
+entry:
+
+; CHECK: call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %1 = bitcast i8* %0 to void (i8*)*
+  call fastcc void %1(i8* %hdl)
+
+; CHECK: call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %3 = bitcast i8* %2 to void (i8*)*
+  call fastcc void %3(i8* %hdl)
+
+; CHECK: ret void
+  ret void
+}
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8* @llvm.coro.frame()
+declare i8* @llvm.coro.subfn.addr(i8*, i8)

Added: llvm/trunk/test/Transforms/Coroutines/coro-frame-unreachable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-frame-unreachable.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-frame-unreachable.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-frame-unreachable.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,50 @@
+; Check that coro-split doesn't choke on intrinsics in unreachable blocks
+; RUN: opt < %s -coro-split -S
+
+define i8* @f(i1 %arg) "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %arg.addr = alloca i1
+  store i1 %arg, i1* %arg.addr
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br label %cont
+
+cont:
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %cleanup]
+resume:
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+
+no.predecessors:
+  %argval = load i1, i1* %arg.addr
+  call void @print(i1 %argval)
+  br label %suspend
+
+}
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @malloc(i32)
+declare void @print(i1)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/coro-frame.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-frame.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-frame.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-frame.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,66 @@
+; Check that we can handle spills of the result of the invoke instruction
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+define i8* @f(i64 %this) "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %this.addr = alloca i64
+  store i64 %this, i64* %this.addr
+  %this1 = load i64, i64* %this.addr
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %r = invoke double @print(double 0.0) to label %cont unwind label %pad
+
+cont:
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %cleanup]
+resume:
+  call double @print(double %r)
+  call void @print2(i64 %this1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+pad:
+  %tok = cleanuppad within none []
+  cleanupret from %tok unwind to caller
+}
+
+; See if the float was added to the frame
+; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i64, double }
+
+; See if the float was spilled into the frame
+; CHECK-LABEL: @f(
+; CHECK: %r = call double @print(
+; CHECK: %r.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
+; CHECK: store double %r, double* %r.spill.addr
+; CHECK: ret i8* %hdl
+
+; See of the float was loaded from the frame
+; CHECK-LABEL: @f.resume(
+; CHECK: %r.reload = load double, double* %r.reload.addr
+; CHECK: call double @print(double %r.reload)
+; CHECK: ret void
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @malloc(i32)
+declare double @print(double)
+declare void @print2(i64)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/coro-heap-elide.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-heap-elide.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-heap-elide.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-heap-elide.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,160 @@
+; Tests that the dynamic allocation and deallocation of the coroutine frame is
+; elided and any tail calls referencing the coroutine frame has the tail 
+; call attribute removed.
+; RUN: opt < %s -S -inline -coro-elide -instsimplify -simplifycfg | FileCheck %s
+
+declare void @print(i32) nounwind
+
+%f.frame = type {i32}
+
+declare void @bar(i8*)
+
+declare fastcc void @f.resume(%f.frame*)
+declare fastcc void @f.destroy(%f.frame*)
+declare fastcc void @f.cleanup(%f.frame*)
+
+declare void @may_throw()
+declare i8* @CustomAlloc(i32)
+declare void @CustomFree(i8*)
+
+ at f.resumers = internal constant [3 x void (%f.frame*)*] 
+  [void (%f.frame*)* @f.resume, void (%f.frame*)* @f.destroy, void (%f.frame*)* @f.cleanup]
+
+; a coroutine start function
+define i8* @f() personality i8* null {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null,
+                      i8* bitcast (i8*()* @f to i8*),
+                      i8* bitcast ([3 x void (%f.frame*)*]* @f.resumers to i8*))
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %alloc = call i8* @CustomAlloc(i32 4)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %phi)
+  invoke void @may_throw() 
+    to label %ret unwind label %ehcleanup
+ret:          
+  ret i8* %hdl
+
+ehcleanup:
+  %tok = cleanuppad within none []
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  %need.dyn.free = icmp ne i8* %mem, null
+  br i1 %need.dyn.free, label %dyn.free, label %if.end
+dyn.free:
+  call void @CustomFree(i8* %mem)
+  br label %if.end
+if.end:
+  cleanupret from %tok unwind to caller
+}
+
+; CHECK-LABEL: @callResume(
+define void @callResume() {
+entry:
+; CHECK: alloca %f.frame
+; CHECK-NOT: coro.begin
+; CHECK-NOT: CustomAlloc
+; CHECK: call void @may_throw()
+  %hdl = call i8* @f()
+
+; Need to remove 'tail' from the first call to @bar
+; CHECK-NOT: tail call void @bar(
+; CHECK: call void @bar(
+  tail call void @bar(i8* %hdl)
+; CHECK: tail call void @bar(
+  tail call void @bar(i8* null)
+
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* %vFrame)
+  %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %1 = bitcast i8* %0 to void (i8*)*
+  call fastcc void %1(i8* %hdl)
+
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8* %vFrame)
+  %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %3 = bitcast i8* %2 to void (i8*)*
+  call fastcc void %3(i8* %hdl)
+
+; CHECK-NEXT: ret void
+  ret void
+}
+
+; CHECK-LABEL: @callResume_PR34897_no_elision(
+define void @callResume_PR34897_no_elision(i1 %cond) {
+; CHECK-LABEL: entry:
+entry:
+; CHECK: call i8* @CustomAlloc(
+  %hdl = call i8* @f()
+; CHECK: tail call void @bar(
+  tail call void @bar(i8* %hdl)
+; CHECK: tail call void @bar(
+  tail call void @bar(i8* null)
+  br i1 %cond, label %if.then, label %if.else
+
+; CHECK-LABEL: if.then:
+if.then:
+; CHECK: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8*
+  %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %1 = bitcast i8* %0 to void (i8*)*
+  call fastcc void %1(i8* %hdl)
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8*
+  %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %3 = bitcast i8* %2 to void (i8*)*
+  call fastcc void %3(i8* %hdl)
+  br label %return
+
+if.else:
+  br label %return
+
+; CHECK-LABEL: return:
+return:
+; CHECK: ret void
+  ret void
+}
+
+; a coroutine start function (cannot elide heap alloc, due to second argument to
+; coro.begin not pointint to coro.alloc)
+define i8* @f_no_elision() personality i8* null {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null,
+                      i8* bitcast (i8*()* @f_no_elision to i8*),
+                      i8* bitcast ([3 x void (%f.frame*)*]* @f.resumers to i8*))
+  %alloc = call i8* @CustomAlloc(i32 4)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @callResume_no_elision(
+define void @callResume_no_elision() {
+entry:
+; CHECK: call i8* @CustomAlloc(
+  %hdl = call i8* @f_no_elision()
+
+; Tail call should remain tail calls
+; CHECK: tail call void @bar(
+  tail call void @bar(i8* %hdl)
+; CHECK: tail call void @bar(  
+  tail call void @bar(i8* null)
+
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8*
+  %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %1 = bitcast i8* %0 to void (i8*)*
+  call fastcc void %1(i8* %hdl)
+
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8*
+  %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %3 = bitcast i8* %2 to void (i8*)*
+  call fastcc void %3(i8* %hdl)
+
+; CHECK-NEXT: ret void
+  ret void
+}
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.free(token, i8*)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8* @llvm.coro.frame(token)
+declare i8* @llvm.coro.subfn.addr(i8*, i8)

Added: llvm/trunk/test/Transforms/Coroutines/coro-materialize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-materialize.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-materialize.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-materialize.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,52 @@
+; Verifies that we materialize instruction across suspend points
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+define i8* @f(i32 %n) "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  %inc1 = add i32 %n, 1
+  %sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp1, label %suspend [i8 0, label %resume1
+                                  i8 1, label %cleanup]
+resume1:
+  %inc2 = add i32 %inc1, 1
+  %sp2 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp1, label %suspend [i8 0, label %resume2
+                                  i8 1, label %cleanup]
+
+resume2:
+  call void @print(i32 %inc1)
+  call void @print(i32 %inc2)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; See that we only spilled one value
+; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i32 }
+; CHECK-LABEL: @f(
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/coro-padding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-padding.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-padding.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-padding.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,61 @@
+; Check that we will insert the correct padding if natural alignment of the
+; spilled data does not match the alignment specified in alloca instruction.
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+%PackedStruct = type <{ i64 }>
+
+declare void @consume(%PackedStruct*)
+
+define i8* @f() "coroutine.presplit"="1" {
+entry:
+  %data = alloca %PackedStruct, align 8
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  call void @consume(%PackedStruct* %data)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %cleanup]
+resume:
+  call void @consume(%PackedStruct* %data)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; See if the padding was inserted before PackedStruct
+; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, [6 x i8], %PackedStruct }
+
+; See if we used correct index to access packed struct (padding is field 4)
+; CHECK-LABEL: @f(
+; CHECK:       %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
+; CHECK-NEXT:  call void @consume(%PackedStruct* %[[DATA]])
+; CHECK: ret i8*
+
+; See if we used correct index to access packed struct (padding is field 4)
+; CHECK-LABEL: @f.resume(
+; CHECK:       %[[DATA:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
+; CHECK-NEXT:  call void @consume(%PackedStruct* %[[DATA]])
+; CHECK: ret void
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @malloc(i32)
+declare double @print(double)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/coro-spill-after-phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-spill-after-phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-spill-after-phi.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-spill-after-phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,60 @@
+; Verifies that we insert spills of PHI instruction _after) all PHI Nodes
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+define i8* @f(i1 %n) "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br i1 %n, label %begin, label %alt
+alt:
+  br label %begin
+
+begin:
+  %phi1 = phi i32 [ 0, %entry ], [ 2, %alt ]
+  %phi2 = phi i32 [ 1, %entry ], [ 3, %alt ]
+
+  %sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp1, label %suspend [i8 0, label %resume
+                                  i8 1, label %cleanup]
+resume:
+  call i32 @print(i32 %phi1)
+  call i32 @print(i32 %phi2)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; Verifies that the both phis are stored correctly in the coroutine frame
+; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i32, i32 }
+; CHECK-LABEL: @f(
+; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr
+; CHECK: %phi1 = select i1 %n, i32 0, i32 2
+; CHECK: %phi2 = select i1 %n, i32 1, i32 3
+; CHECK: %phi2.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5
+; CHECK: store i32 %phi2, i32* %phi2.spill.addr
+; CHECK: %phi1.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK: store i32 %phi1, i32* %phi1.spill.addr
+; CHECK: ret i8* %hdl
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @malloc(i32)
+declare i32 @print(i32)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/coro-spill-corobegin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-spill-corobegin.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-spill-corobegin.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-spill-corobegin.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,69 @@
+; Check that we can spills coro.begin from an inlined inner coroutine.
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+%g.Frame = type { void (%g.Frame*)*, void (%g.Frame*)*, i32, i1, i32 }
+
+ at g.resumers = private constant [3 x void (%g.Frame*)*] [void (%g.Frame*)* @g.dummy, void (%g.Frame*)* @g.dummy, void (%g.Frame*)* @g.dummy]
+
+declare void @g.dummy(%g.Frame*)
+
+define i8* @f() "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  %innerid = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%g.Frame*)*]* @g.resumers to i8*))
+  %innerhdl = call noalias nonnull i8* @llvm.coro.begin(token %innerid, i8* null)
+  %gframe = bitcast i8* %innerhdl to %g.Frame*
+
+  %tok = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %tok, label %suspend [i8 0, label %resume
+                                i8 1, label %cleanup]
+resume:
+  %gvar.addr = getelementptr inbounds %g.Frame, %g.Frame* %gframe, i32 0, i32 4
+  %gvar = load i32, i32* %gvar.addr
+  call void @print.i32(i32 %gvar)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; See if the i8* for coro.begin was added to f.Frame
+; CHECK-LABEL: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, i1, i8* }
+
+; See if the g's coro.begin was spilled into the frame
+; CHECK-LABEL: @f(
+; CHECK: %innerid = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* bitcast ([3 x void (%g.Frame*)*]* @g.resumers to i8*))
+; CHECK: %innerhdl = call noalias nonnull i8* @llvm.coro.begin(token %innerid, i8* null)
+; CHECK: %[[spilladdr:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4
+; CHECK: store i8* %innerhdl, i8** %[[spilladdr]]
+
+; See if the coro.begin was loaded from the frame
+; CHECK-LABEL: @f.resume(
+; CHECK: %[[innerhdlAddr:.+]] = getelementptr inbounds %f.Frame, %f.Frame* %{{.+}}, i32 0, i32 4
+; CHECK: %[[innerhdl:.+]] = load i8*, i8** %[[innerhdlAddr]]
+; CHECK: %[[gframe:.+]] = bitcast i8* %[[innerhdl]] to %g.Frame*
+; CHECK: %[[gvarAddr:.+]] = getelementptr inbounds %g.Frame, %g.Frame* %[[gframe]], i32 0, i32 4
+; CHECK: %[[gvar:.+]] = load i32, i32* %[[gvarAddr]]
+; CHECK: call void @print.i32(i32 %[[gvar]])
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @malloc(i32)
+declare void @print.i32(i32)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/coro-split-00.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-split-00.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-split-00.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-split-00.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,79 @@
+; Tests that coro-split pass splits the coroutine into f, f.resume and f.destroy
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+define i8* @f() "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.alloc, label %dyn.alloc, label %begin
+
+dyn.alloc:  
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %begin
+
+begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %phi)
+  call void @print(i32 0)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume 
+                                i8 1, label %cleanup]
+resume:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)  
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @f(
+; CHECK: call i8* @malloc
+; CHECK: @llvm.coro.begin(token %id, i8* %phi)
+; CHECK: store void (%f.Frame*)* @f.resume, void (%f.Frame*)** %resume.addr
+; CHECK: %[[SEL:.+]] = select i1 %need.alloc, void (%f.Frame*)* @f.destroy, void (%f.Frame*)* @f.cleanup
+; CHECK: store void (%f.Frame*)* %[[SEL]], void (%f.Frame*)** %destroy.addr
+; CHECK: call void @print(i32 0)
+; CHECK-NOT: call void @print(i32 1)
+; CHECK-NOT: call void @free(
+; CHECK: ret i8* %hdl
+
+; CHECK-LABEL: @f.resume(
+; CHECK-NOT: call i8* @malloc
+; CHECK-NOT: call void @print(i32 0)
+; CHECK: call void @print(i32 1)
+; CHECK-NOT: call void @print(i32 0)
+; CHECK: call void @free(
+; CHECK: ret void
+
+; CHECK-LABEL: @f.destroy(
+; CHECK-NOT: call i8* @malloc
+; CHECK-NOT: call void @print(
+; CHECK: call void @free(
+; CHECK: ret void
+
+; CHECK-LABEL: @f.cleanup(
+; CHECK-NOT: call i8* @malloc
+; CHECK-NOT: call void @print(
+; CHECK-NOT: call void @free(
+; CHECK: ret void
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1) 
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/coro-split-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-split-01.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-split-01.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-split-01.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; Tests that a coroutine is split, inlined into the caller and devirtualized.
+; RUN: opt < %s -S -enable-coroutines -O2 | FileCheck %s
+
+define i8* @f() {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %phi)
+  call void @print(i32 0)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume 
+                                i8 1, label %cleanup]
+resume:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)  
+  ret i8* %hdl
+}
+define i32 @main() {
+entry:
+  %hdl = call i8* @f()
+  call void @llvm.coro.resume(i8* %hdl)
+  ret i32 0
+; CHECK-LABEL: @main(
+; CHECK: call void @print(i32 0)
+; CHECK: call void @print(i32 1)
+; CHECK:      ret i32 0
+}
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+  
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1) 
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/coro-split-02.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-split-02.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-split-02.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-split-02.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,57 @@
+; Tests that coro-split can handle the case when a code after coro.suspend uses
+; a value produces between coro.save and coro.suspend (%Result.i19)
+; and checks whether stray coro.saves are properly removed
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+%"struct.std::coroutine_handle" = type { i8* }
+%"struct.std::coroutine_handle.0" = type { %"struct.std::coroutine_handle" }
+%"struct.lean_future<int>::Awaiter" = type { i32, %"struct.std::coroutine_handle.0" }
+
+declare i8* @malloc(i64)
+declare void @print(i32)
+
+define void @a() "coroutine.presplit"="1" {
+entry:
+  %ref.tmp7 = alloca %"struct.lean_future<int>::Awaiter", align 8
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %alloc = call i8* @malloc(i64 16) #3
+  %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  %save = call token @llvm.coro.save(i8* null)
+  %Result.i19 = getelementptr inbounds %"struct.lean_future<int>::Awaiter", %"struct.lean_future<int>::Awaiter"* %ref.tmp7, i64 0, i32 0
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  %StrayCoroSave = call token @llvm.coro.save(i8* null)
+  %val = load i32, i32* %Result.i19
+  call void @print(i32 %val)
+  br label %exit
+exit:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: @a.resume(
+; CHECK:         getelementptr inbounds %a.Frame
+; CHECK-NEXT:    getelementptr inbounds %"struct.lean_future<int>::Awaiter"
+; CHECK-NOT:     call token @llvm.coro.save(i8* null)
+; CHECK-NEXT:    %val = load i32, i32* %Result
+; CHECK-NEXT:    call void @print(i32 %val)
+; CHECK-NEXT:    ret void
+
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*)
+declare i1 @llvm.coro.alloc(token) #3
+declare noalias nonnull i8* @"\01??2 at YAPEAX_K@Z"(i64) local_unnamed_addr
+declare i64 @llvm.coro.size.i64() #5
+declare i8* @llvm.coro.begin(token, i8* writeonly) #3
+declare void @"\01?puts@@YAXZZ"(...)
+declare token @llvm.coro.save(i8*) #3
+declare i8* @llvm.coro.frame() #5
+declare i8 @llvm.coro.suspend(token, i1) #3
+declare void @"\01??3 at YAXPEAX@Z"(i8*) local_unnamed_addr #10
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2
+declare i1 @llvm.coro.end(i8*, i1) #3
+

Added: llvm/trunk/test/Transforms/Coroutines/coro-split-alloc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-split-alloc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-split-alloc.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-split-alloc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,64 @@
+; Tests that coro-split passes initialized values to coroutine frame allocator.
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+define i8* @f(i32 %argument) "coroutine.presplit"="1" {
+entry:
+  %argument.addr = alloca i32, align 4
+  %incremented = add i32 %argument, 1
+  store i32 %incremented, i32* %argument.addr, align 4
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.alloc, label %dyn.alloc, label %begin
+
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %allocator_argument = load i32, i32* %argument.addr, align 4
+  %alloc = call i8* @custom_alloctor(i32 %size, i32 %allocator_argument)
+  br label %begin
+
+begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %phi)
+  %print_argument = load i32, i32* %argument.addr, align 4
+  call void @print(i32 %print_argument)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %cleanup]
+resume:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @f(
+; CHECK: %argument.addr = alloca i32
+; CHECK: %incremented = add i32 %argument, 1
+; CHECK-NEXT: store i32 %incremented, i32* %argument.addr
+; CHECK-LABEL: dyn.alloc:
+; CHECK: %allocator_argument = load i32, i32* %argument.addr
+; CHECK: %alloc = call i8* @custom_alloctor(i32 24, i32 %allocator_argument)
+; CHECK-LABEL: begin:
+; CHECK: %print_argument = load i32, i32* %argument.addr
+; CHECK: call void @print(i32 %print_argument)
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @custom_alloctor(i32, i32)
+declare void @print(i32)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/coro-split-dbg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-split-dbg.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-split-dbg.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-split-dbg.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,119 @@
+; Make sure that coro-split correctly deals with debug information.
+; The test here is simply that it does not result in bad IR that will crash opt.
+; RUN: opt < %s -coro-split -disable-output
+source_filename = "coro.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @bar(...) local_unnamed_addr #2
+
+; Function Attrs: nounwind uwtable
+define i8* @f() #3 !dbg !16 {
+entry:
+  %0 = tail call token @llvm.coro.id(i32 0, i8* null, i8* bitcast (i8* ()* @f to i8*), i8* null), !dbg !26
+  %1 = tail call i64 @llvm.coro.size.i64(), !dbg !26
+  %call = tail call i8* @malloc(i64 %1), !dbg !26
+  %2 = tail call i8* @llvm.coro.begin(token %0, i8* %call) #9, !dbg !26
+  tail call void @llvm.dbg.value(metadata i8* %2, metadata !21, metadata !12), !dbg !26
+  br label %for.cond, !dbg !27
+
+for.cond:                                         ; preds = %for.cond, %entry
+  tail call void @llvm.dbg.value(metadata i32 undef, metadata !22, metadata !12), !dbg !28
+  tail call void @llvm.dbg.value(metadata i32 undef, metadata !11, metadata !12) #7, !dbg !29
+  tail call void (...) @bar() #7, !dbg !33
+  %3 = tail call token @llvm.coro.save(i8* null), !dbg !34
+  %4 = tail call i8 @llvm.coro.suspend(token %3, i1 false), !dbg !34
+  %conv = sext i8 %4 to i32, !dbg !34
+  switch i32 %conv, label %coro_Suspend [
+    i32 0, label %for.cond
+    i32 1, label %coro_Cleanup
+  ], !dbg !34
+
+coro_Cleanup:                                     ; preds = %for.cond
+  %5 = tail call i8* @llvm.coro.free(token %0, i8* %2), !dbg !35
+  tail call void @free(i8* nonnull %5), !dbg !36
+  br label %coro_Suspend, !dbg !36
+
+coro_Suspend:                                     ; preds = %for.cond, %if.then, %coro_Cleanup
+  tail call i1 @llvm.coro.end(i8* null, i1 false) #9, !dbg !38
+  ret i8* %2, !dbg !39
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #4
+
+; Function Attrs: argmemonly nounwind readonly
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #5
+
+; Function Attrs: nounwind
+declare noalias i8* @malloc(i64) local_unnamed_addr #6
+declare i64 @llvm.coro.size.i64() #1
+declare i8* @llvm.coro.begin(token, i8* writeonly) #7
+declare token @llvm.coro.save(i8*) #7
+declare i8 @llvm.coro.suspend(token, i1) #7
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #4
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #5
+declare void @free(i8* nocapture) local_unnamed_addr #6
+declare i1 @llvm.coro.end(i8*, i1) #7
+declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #5
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind uwtable "coroutine.presplit"="1" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { argmemonly nounwind }
+attributes #5 = { argmemonly nounwind readonly }
+attributes #6 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #7 = { nounwind }
+attributes #8 = { alwaysinline nounwind }
+attributes #9 = { noduplicate }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 4.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "coro.c", directory: "/home/gor/build/bin")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 4.0.0"}
+!6 = distinct !DISubprogram(name: "print", scope: !1, file: !1, line: 6, type: !7, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !10)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null, !9}
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !{!11}
+!11 = !DILocalVariable(name: "v", arg: 1, scope: !6, file: !1, line: 6, type: !9)
+!12 = !DIExpression()
+!13 = !DILocation(line: 6, column: 16, scope: !6)
+!14 = !DILocation(line: 6, column: 19, scope: !6)
+!15 = !DILocation(line: 6, column: 25, scope: !6)
+!16 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 8, type: !17, isLocal: false, isDefinition: true, scopeLine: 8, isOptimized: true, unit: !0, retainedNodes: !20)
+!17 = !DISubroutineType(types: !18)
+!18 = !{!19}
+!19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64, align: 64)
+!20 = !{!21, !22, !24}
+!21 = !DILocalVariable(name: "coro_hdl", scope: !16, file: !1, line: 9, type: !19)
+!22 = !DILocalVariable(name: "i", scope: !23, file: !1, line: 11, type: !9)
+!23 = distinct !DILexicalBlock(scope: !16, file: !1, line: 11, column: 3)
+!24 = !DILocalVariable(name: "coro_mem", scope: !25, file: !1, line: 16, type: !19)
+!25 = distinct !DILexicalBlock(scope: !16, file: !1, line: 16, column: 3)
+!26 = !DILocation(line: 9, column: 3, scope: !16)
+!27 = !DILocation(line: 11, column: 8, scope: !23)
+!28 = !DILocation(line: 11, column: 12, scope: !23)
+!29 = !DILocation(line: 6, column: 16, scope: !6, inlinedAt: !30)
+!30 = distinct !DILocation(line: 12, column: 5, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !32, file: !1, line: 11, column: 25)
+!32 = distinct !DILexicalBlock(scope: !23, file: !1, line: 11, column: 3)
+!33 = !DILocation(line: 6, column: 19, scope: !6, inlinedAt: !30)
+!34 = !DILocation(line: 13, column: 5, scope: !31)
+!35 = !DILocation(line: 16, column: 3, scope: !25)
+!36 = !DILocation(line: 16, column: 3, scope: !37)
+!37 = distinct !DILexicalBlock(scope: !25, file: !1, line: 16, column: 3)
+!38 = !DILocation(line: 16, column: 3, scope: !16)
+!39 = !DILocation(line: 17, column: 1, scope: !16)

Added: llvm/trunk/test/Transforms/Coroutines/coro-split-eh.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-split-eh.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-split-eh.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-split-eh.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,145 @@
+; Tests that coro-split removes cleanup code after coro.end in resume functions
+; and retains it in the start function.
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+define i8* @f(i1 %val) "coroutine.presplit"="1" personality i32 3 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* null)
+  call void @print(i32 0)
+  br i1 %val, label %resume, label %susp
+
+susp:  
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume 
+                                i8 1, label %suspend]
+resume:
+  invoke void @print(i32 1) to label %suspend unwind label %lpad
+
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)  
+  call void @print(i32 0) ; should not be present in f.resume
+  ret i8* %hdl
+
+lpad:
+  %lpval = landingpad { i8*, i32 }
+     cleanup
+
+  call void @print(i32 2)
+  %need.resume = call i1 @llvm.coro.end(i8* null, i1 true)
+  br i1 %need.resume, label %eh.resume, label %cleanup.cont
+
+cleanup.cont:
+  call void @print(i32 3) ; should not be present in f.resume
+  br label %eh.resume
+
+eh.resume:
+  resume { i8*, i32 } %lpval
+}
+
+; Verify that start function contains both print calls the one before and after coro.end
+; CHECK-LABEL: define i8* @f(
+; CHECK: invoke void @print(i32 1)
+; CHECK:   to label %AfterCoroEnd unwind label %lpad
+
+; CHECK: AfterCoroEnd:
+; CHECK:   call void @print(i32 0)
+; CHECK:   ret i8* %hdl
+
+; CHECK:         lpad:
+; CHECK-NEXT:      %lpval = landingpad { i8*, i32 }
+; CHECK-NEXT:         cleanup
+; CHECK-NEXT:      call void @print(i32 2)
+; CHECK-NEXT:      call void @print(i32 3)
+; CHECK-NEXT:      resume { i8*, i32 } %lpval
+
+define i8* @f2(i1 %val) "coroutine.presplit"="1" personality i32 4 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* null)
+  call void @print(i32 0)
+  br i1 %val, label %resume, label %susp
+
+susp:  
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume 
+                                i8 1, label %suspend]
+resume:
+  invoke void @print(i32 1) to label %suspend unwind label %lpad
+
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)  
+  call void @print(i32 0) ; should not be present in f.resume
+  ret i8* %hdl
+
+lpad:
+  %tok = cleanuppad within none []
+  call void @print(i32 2)
+  %unused = call i1 @llvm.coro.end(i8* null, i1 true) [ "funclet"(token %tok) ]
+  cleanupret from %tok unwind label %cleanup.cont
+
+cleanup.cont:
+  %tok2 = cleanuppad within none []
+  call void @print(i32 3) ; should not be present in f.resume
+  cleanupret from %tok2 unwind to caller 
+}
+
+; Verify that start function contains both print calls the one before and after coro.end
+; CHECK-LABEL: define i8* @f2(
+; CHECK: invoke void @print(i32 1)
+; CHECK:   to label %AfterCoroEnd unwind label %lpad
+
+; CHECK: AfterCoroEnd:
+; CHECK:   call void @print(i32 0)
+; CHECK:   ret i8* %hdl
+
+; CHECK:      lpad:
+; CHECK-NEXT:   %tok = cleanuppad within none []
+; CHECK-NEXT:   call void @print(i32 2)
+; CHECK-NEXT:   call void @print(i32 3)
+; CHECK-NEXT:   cleanupret from %tok unwind to caller
+
+; VERIFY Resume Parts
+
+; Verify that resume function does not contains both print calls appearing after coro.end
+; CHECK-LABEL: define internal fastcc void @f.resume
+; CHECK: invoke void @print(i32 1)
+; CHECK:   to label %CoroEnd unwind label %lpad
+
+; CHECK:      CoroEnd:
+; CHECK-NEXT:   ret void
+
+; CHECK:         lpad:
+; CHECK-NEXT:      %lpval = landingpad { i8*, i32 }
+; CHECK-NEXT:         cleanup
+; CHECK-NEXT:      call void @print(i32 2)
+; CHECK-NEXT:      resume { i8*, i32 } %lpval
+
+; Verify that resume function does not contains both print calls appearing after coro.end
+; CHECK-LABEL: define internal fastcc void @f2.resume
+; CHECK: invoke void @print(i32 1)
+; CHECK:   to label %CoroEnd unwind label %lpad
+
+; CHECK:      CoroEnd:
+; CHECK-NEXT:   ret void
+
+; CHECK:      lpad:
+; CHECK-NEXT:   %tok = cleanuppad within none []
+; CHECK-NEXT:   call void @print(i32 2)
+; CHECK-NEXT:   cleanupret from %tok unwind to caller
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i8* @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1) 
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @free(i8*)
+

Added: llvm/trunk/test/Transforms/Coroutines/coro-split-hidden.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-split-hidden.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-split-hidden.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-split-hidden.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,81 @@
+; Tests that coro-split can convert functions with hidden visibility.
+; These may be generated by a frontend such as Clang, when inlining with
+; '-fvisibility-inlines-hidden'.
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+define hidden i8* @f() "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.alloc, label %dyn.alloc, label %begin
+
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %begin
+
+begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %phi)
+  call void @print(i32 0)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %cleanup]
+resume:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: hidden{{.*}}@f(
+; CHECK: call i8* @malloc
+; CHECK: @llvm.coro.begin(token %id, i8* %phi)
+; CHECK: store void (%f.Frame*)* @f.resume, void (%f.Frame*)** %resume.addr
+; CHECK: %[[SEL:.+]] = select i1 %need.alloc, void (%f.Frame*)* @f.destroy, void (%f.Frame*)* @f.cleanup
+; CHECK: store void (%f.Frame*)* %[[SEL]], void (%f.Frame*)** %destroy.addr
+; CHECK: call void @print(i32 0)
+; CHECK-NOT: call void @print(i32 1)
+; CHECK-NOT: call void @free(
+; CHECK: ret i8* %hdl
+
+; CHECK-LABEL: internal{{.*}}@f.resume(
+; CHECK-NOT: call i8* @malloc
+; CHECK-NOT: call void @print(i32 0)
+; CHECK: call void @print(i32 1)
+; CHECK-NOT: call void @print(i32 0)
+; CHECK: call void @free(
+; CHECK: ret void
+
+; CHECK-LABEL: internal{{.*}}@f.destroy(
+; CHECK-NOT: call i8* @malloc
+; CHECK-NOT: call void @print(
+; CHECK: call void @free(
+; CHECK: ret void
+
+; CHECK-LABEL: internal{{.*}}@f.cleanup(
+; CHECK-NOT: call i8* @malloc
+; CHECK-NOT: call void @print(
+; CHECK-NOT: call void @free(
+; CHECK: ret void
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/coro-split-musttail.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/coro-split-musttail.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/coro-split-musttail.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/coro-split-musttail.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,60 @@
+; Tests that coro-split will convert coro.resume followed by a suspend to a
+; musttail call.
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+define void @f() "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %alloc = call i8* @malloc(i64 16) #3
+  %vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
+
+  %save = call token @llvm.coro.save(i8* null)
+  %addr1 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv1 = bitcast i8* %addr1 to void (i8*)*
+  call fastcc void %pv1(i8* null)
+
+  %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %suspend, label %exit [
+    i8 0, label %await.ready
+    i8 1, label %exit
+  ]
+await.ready:
+  %save2 = call token @llvm.coro.save(i8* null)
+  %addr2 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+  %pv2 = bitcast i8* %addr2 to void (i8*)*
+  call fastcc void %pv2(i8* null)
+
+  %suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend2, label %exit [
+    i8 0, label %exit
+    i8 1, label %exit
+  ]
+exit:
+  call i1 @llvm.coro.end(i8* null, i1 false)
+  ret void
+}
+
+; Verify that in the initial function resume is not marked with musttail.
+; CHECK-LABEL: @f(
+; CHECK: %[[addr1:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+; CHECK-NEXT: %[[pv1:.+]] = bitcast i8* %[[addr1]] to void (i8*)*
+; CHECK-NOT: musttail call fastcc void %[[pv1]](i8* null)
+
+; Verify that in the resume part resume call is marked with musttail.
+; CHECK-LABEL: @f.resume(
+; CHECK: %[[addr2:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
+; CHECK-NEXT: %[[pv2:.+]] = bitcast i8* %[[addr2]] to void (i8*)*
+; CHECK-NEXT: musttail call fastcc void %[[pv2]](i8* null)
+; CHECK-NEXT: ret void
+
+declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*)
+declare i1 @llvm.coro.alloc(token) #3
+declare i64 @llvm.coro.size.i64() #5
+declare i8* @llvm.coro.begin(token, i8* writeonly) #3
+declare token @llvm.coro.save(i8*) #3
+declare i8* @llvm.coro.frame() #5
+declare i8 @llvm.coro.suspend(token, i1) #3
+declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2
+declare i1 @llvm.coro.end(i8*, i1) #3
+declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #5
+declare i8* @malloc(i64)

Added: llvm/trunk/test/Transforms/Coroutines/ex0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/ex0.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/ex0.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/ex0.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,59 @@
+; First example from Doc/Coroutines.rst (two block loop)
+; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br label %loop
+
+loop:
+  %n.val = phi i32 [ %n, %entry ], [ %inc, %resume ]
+  call void @print(i32 %n.val)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume 
+                                i8 1, label %cleanup]
+resume:
+  %inc = add i32 %n.val, 1
+  br label %loop
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)  
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @main(
+define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+; CHECK: entry:
+; CHECK:      call void @print(i32 4)
+; CHECK:      call void @print(i32 5)
+; CHECK:      call void @print(i32 6)
+; CHECK:      ret i32 0
+}
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i8* @llvm.coro.alloc(token)
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+  
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1) 
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/ex1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/ex1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/ex1.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/ex1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,54 @@
+; First example from Doc/Coroutines.rst (one block loop)
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br label %loop
+loop:
+  %n.val = phi i32 [ %n, %entry ], [ %inc, %loop ]
+  %inc = add nsw i32 %n.val, 1
+  call void @print(i32 %n.val)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %loop
+                                i8 1, label %cleanup]
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @main(
+define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+; CHECK-NEXT: entry:
+; CHECK:      call void @print(i32 4)
+; CHECK:      call void @print(i32 5)
+; CHECK:      call void @print(i32 6)
+; CHECK:      ret i32 0
+}
+
+declare i8* @malloc(i32)
+declare void @free(i8*)
+declare void @print(i32)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/ex2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/ex2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/ex2.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/ex2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,63 @@
+; Second example from Doc/Coroutines.rst (custom alloc and free functions)
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @CustomAlloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %loop
+loop:
+  %n.val = phi i32 [ %n, %coro.begin ], [ %inc, %loop ]
+  %inc = add nsw i32 %n.val, 1
+  call void @print(i32 %n.val)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %loop
+                                i8 1, label %cleanup]
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  %need.dyn.free = icmp ne i8* %mem, null
+  br i1 %need.dyn.free, label %dyn.free, label %suspend
+dyn.free:
+  call void @CustomFree(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @main
+define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+; CHECK:      call void @print(i32 4)
+; CHECK-NEXT: call void @print(i32 5)
+; CHECK-NEXT: call void @print(i32 6)
+; CHECK-NEXT: ret i32 0
+}
+
+declare i8* @CustomAlloc(i32)
+declare void @CustomFree(i8*)
+declare void @print(i32)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/ex3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/ex3.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/ex3.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/ex3.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,60 @@
+; Third example from Doc/Coroutines.rst (two suspend points)
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br label %loop
+loop:
+  %n.val = phi i32 [ %n, %entry ], [ %inc, %loop.resume ]
+  call void @print(i32 %n.val) #4
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %loop.resume
+                                i8 1, label %cleanup]
+loop.resume:
+  %inc = add nsw i32 %n.val, 1
+  %sub = xor i32 %n.val, -1
+  call void @print(i32 %sub)
+  %1 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %1, label %suspend [i8 0, label %loop
+                                i8 1, label %cleanup]
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @main
+define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+; CHECK:      call void @print(i32 4)
+; CHECK-NEXT: call void @print(i32 -5)
+; CHECK-NEXT: call void @print(i32 5)
+; CHECK:      ret i32 0
+}
+
+declare i8* @malloc(i32)
+declare void @free(i8*)
+declare void @print(i32)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/ex4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/ex4.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/ex4.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/ex4.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,71 @@
+; Fourth example from Doc/Coroutines.rst (coroutine promise)
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+  %promise = alloca i32
+  %pv = bitcast i32* %promise to i8*
+  %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %loop
+loop:
+  %n.val = phi i32 [ %n, %coro.begin ], [ %inc, %loop ]
+  %inc = add nsw i32 %n.val, 1
+  store i32 %n.val, i32* %promise
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %loop
+                                i8 1, label %cleanup]
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @main
+define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  %promise.addr.raw = call i8* @llvm.coro.promise(i8* %hdl, i32 4, i1 false)
+  %promise.addr = bitcast i8* %promise.addr.raw to i32*
+  %val0 = load i32, i32* %promise.addr
+  call void @print(i32 %val0)
+  call void @llvm.coro.resume(i8* %hdl)
+  %val1 = load i32, i32* %promise.addr
+  call void @print(i32 %val1)
+  call void @llvm.coro.resume(i8* %hdl)
+  %val2 = load i32, i32* %promise.addr
+  call void @print(i32 %val2)
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+; CHECK:      call void @print(i32 4)
+; CHECK-NEXT: call void @print(i32 5)
+; CHECK-NEXT: call void @print(i32 6)
+; CHECK:      ret i32 0
+}
+
+declare i8* @llvm.coro.promise(i8*, i32, i1)
+declare i8* @malloc(i32)
+declare void @free(i8*)
+declare void @print(i32)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/ex5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/ex5.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/ex5.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/ex5.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,73 @@
+; Fifth example from Doc/Coroutines.rst (final suspend)
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br label %while.cond
+while.cond:
+  %n.val = phi i32 [ %n, %entry ], [ %dec, %while.body ]
+  %cmp = icmp sgt i32 %n.val, 0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:
+  %dec = add nsw i32 %n.val, -1
+  call void @print(i32 %n.val) #4
+  %s = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %s, label %suspend [i8 0, label %while.cond
+                                i8 1, label %cleanup]
+while.end:
+  %s.final = call i8 @llvm.coro.suspend(token none, i1 true)
+  switch i8 %s.final, label %suspend [i8 0, label %trap
+                                      i8 1, label %cleanup]
+trap: 
+  call void @llvm.trap()
+  unreachable
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret i8* %hdl
+}
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @llvm.trap()
+declare void @free(i8* nocapture)
+
+declare token @llvm.coro.id( i32, i8*, i8*, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare token @llvm.coro.save(i8*)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+; CHECK-LABEL: @main
+define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  br label %while
+while:
+  call void @llvm.coro.resume(i8* %hdl)
+  %done = call i1 @llvm.coro.done(i8* %hdl)
+  br i1 %done, label %end, label %while
+end:
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+
+; CHECK:      call void @print(i32 4)
+; CHECK:      call void @print(i32 3)
+; CHECK:      call void @print(i32 2)
+; CHECK:      call void @print(i32 1)
+; CHECK:      ret i32 0
+}
+
+declare i1 @llvm.coro.done(i8*)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/no-suspend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/no-suspend.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/no-suspend.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/no-suspend.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,380 @@
+; Test no suspend coroutines
+; RUN: opt < %s -coro-split -S | FileCheck %s
+
+; Coroutine with no-suspends will turn into:
+;
+; CHECK-LABEL: define void @no_suspends(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    alloca
+; CHECK-NEXT:    bitcast
+; CHECK-NEXT:    call void @print(i32 %n)
+; CHECK-NEXT:    ret void
+;
+define void @no_suspends(i32 %n) "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %body
+body:
+  call void @print(i32 %n)
+  br label %cleanup
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  %need.dyn.free = icmp ne i8* %mem, null
+  br i1 %need.dyn.free, label %dyn.free, label %suspend
+dyn.free:
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret void
+}
+
+; SimplifySuspendPoint will detect that coro.resume resumes itself and will
+; replace suspend with a jump to %resume label turning it into no-suspend
+; coroutine.
+;
+; CHECK-LABEL: define void @simplify_resume(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    alloca
+; CHECK-NEXT:    bitcast
+; CHECK-NEXT:    call void @llvm.memcpy
+; CHECK-NEXT:    call void @print(i32 0)
+; CHECK-NEXT:    ret void
+;
+define void @simplify_resume(i8* %src, i8* %dst) "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %body
+body:
+  %save = call token @llvm.coro.save(i8* %hdl)
+  ; memcpy intrinsics should not prevent simplification.
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i1 false)
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %bres = bitcast i8* %subfn to void (i8*)*
+  call fastcc void %bres(i8* %hdl)
+  %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %pre.cleanup]
+resume:
+  call void @print(i32 0)
+  br label %cleanup
+
+pre.cleanup:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret void
+}
+
+; SimplifySuspendPoint will detect that coroutine destroys itself and will
+; replace suspend with a jump to %cleanup label turning it into no-suspend
+; coroutine.
+;
+; CHECK-LABEL: define void @simplify_destroy(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    alloca
+; CHECK-NEXT:    bitcast
+; CHECK-NEXT:    call void @print(i32 1)
+; CHECK-NEXT:    ret void
+;
+define void @simplify_destroy() "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %body
+body:
+  %save = call token @llvm.coro.save(i8* %hdl)
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %bcast = bitcast i8* %subfn to void (i8*)*
+  invoke fastcc void %bcast(i8* %hdl) to label %real_susp unwind label %lpad
+
+real_susp:
+  %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %pre.cleanup]
+resume:
+  call void @print(i32 0)
+  br label %cleanup
+
+pre.cleanup:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret void
+lpad:
+  %lpval = landingpad { i8*, i32 }
+     cleanup
+
+  call void @print(i32 2)
+  resume { i8*, i32 } %lpval
+}
+
+; SimplifySuspendPoint will detect that coro.resume resumes itself and will
+; replace suspend with a jump to %resume label turning it into no-suspend
+; coroutine.
+;
+; CHECK-LABEL: define void @simplify_resume_with_inlined_if(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    alloca
+; CHECK-NEXT:    bitcast
+; CHECK-NEXT:    br i1
+; CHECK:         call void @print(i32 0)
+; CHECK-NEXT:    ret void
+;
+define void @simplify_resume_with_inlined_if(i8* %src, i8* %dst, i1 %cond) "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %body
+body:
+  %save = call token @llvm.coro.save(i8* %hdl)
+  br i1 %cond, label %if.then, label %if.else
+if.then:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i1 false)
+  br label %if.end
+if.else:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %dst, i64 1, i1 false)
+  br label %if.end
+if.end:
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+  %bres = bitcast i8* %subfn to void (i8*)*
+  call fastcc void %bres(i8* %hdl)
+  %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %pre.cleanup]
+resume:
+  call void @print(i32 0)
+  br label %cleanup
+
+pre.cleanup:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret void
+}
+
+
+
+; SimplifySuspendPoint won't be able to simplify if it detects that there are
+; other calls between coro.save and coro.suspend. They potentially can call
+; resume or destroy, so we should not simplify this suspend point.
+;
+; CHECK-LABEL: define void @cannot_simplify_other_calls(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:     llvm.coro.id
+
+define void @cannot_simplify_other_calls() "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %body
+body:
+  %save = call token @llvm.coro.save(i8* %hdl)
+  br label %body1
+
+body1:
+  call void @foo()
+  br label %body2
+
+body2:
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %bcast = bitcast i8* %subfn to void (i8*)*
+  call fastcc void %bcast(i8* %hdl)
+  %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %pre.cleanup]
+resume:
+  call void @print(i32 0)
+  br label %cleanup
+
+pre.cleanup:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret void
+}
+
+; SimplifySuspendPoint won't be able to simplify if it detects that there are
+; other calls between coro.save and coro.suspend. They potentially can call
+; resume or destroy, so we should not simplify this suspend point.
+;
+; CHECK-LABEL: define void @cannot_simplify_calls_in_terminator(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:     llvm.coro.id
+
+define void @cannot_simplify_calls_in_terminator() "coroutine.presplit"="1" personality i32 0 {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %body
+body:
+  %save = call token @llvm.coro.save(i8* %hdl)
+  invoke void @foo() to label %resume_cont unwind label %lpad
+resume_cont:
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %bcast = bitcast i8* %subfn to void (i8*)*
+  call fastcc void %bcast(i8* %hdl)
+  %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %pre.cleanup]
+resume:
+  call void @print(i32 0)
+  br label %cleanup
+
+pre.cleanup:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret void
+lpad:
+  %lpval = landingpad { i8*, i32 }
+     cleanup
+
+  call void @print(i32 2)
+  resume { i8*, i32 } %lpval
+}
+
+; SimplifySuspendPoint won't be able to simplify if it detects that resume or
+; destroy does not immediately preceed coro.suspend.
+;
+; CHECK-LABEL: define void @cannot_simplify_not_last_instr(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:     llvm.coro.id
+
+define void @cannot_simplify_not_last_instr(i8* %dst, i8* %src) "coroutine.presplit"="1" {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %body
+body:
+  %save = call token @llvm.coro.save(i8* %hdl)
+  %subfn = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+  %bcast = bitcast i8* %subfn to void (i8*)*
+  call fastcc void %bcast(i8* %hdl)
+  ; memcpy separates destory from suspend, therefore cannot simplify.
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i1 false)
+  %0 = call i8 @llvm.coro.suspend(token %save, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %resume
+                                i8 1, label %pre.cleanup]
+resume:
+  call void @print(i32 0)
+  br label %cleanup
+
+pre.cleanup:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret void
+}
+
+declare i8* @malloc(i32)
+declare void @free(i8*)
+declare void @print(i32)
+declare void @foo()
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.begin(token, i8*)
+declare token @llvm.coro.save(i8* %hdl)
+declare i8 @llvm.coro.suspend(token, i1)
+declare i8* @llvm.coro.free(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare i8* @llvm.coro.subfn.addr(i8*, i8)
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)

Added: llvm/trunk/test/Transforms/Coroutines/phi-coro-end.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/phi-coro-end.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/phi-coro-end.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/phi-coro-end.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,48 @@
+; Verify that we correctly handle suspend when the coro.end block contains phi
+; RUN: opt < %s -O2 -enable-coroutines -S | FileCheck %s
+
+define i8* @f(i32 %n) {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %cleanup i8 1, label %cleanup]
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+
+suspend:
+  %r = phi i32 [%n, %entry], [1, %cleanup]
+  call i1 @llvm.coro.end(i8* %hdl, i1 false)  
+  call void @print(i32 %r)
+  ret i8* %hdl
+}
+
+; CHECK-LABEL: @main
+define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  call void @llvm.coro.resume(i8* %hdl)
+  ret i32 0
+;CHECK: call void @print(i32 4)
+;CHECK: ret i32 0
+}
+
+declare i8* @llvm.coro.alloc()
+declare i32 @llvm.coro.size.i32()
+declare i8* @llvm.coro.free(token, i8*)
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+  
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1) 
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/restart-trigger.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/restart-trigger.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/restart-trigger.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/restart-trigger.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,43 @@
+; Verifies that restart trigger forces IPO pipelines restart and the same
+; coroutine is looked at by CoroSplit pass twice.
+; REQUIRES: asserts
+; RUN: opt < %s -S -O0 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s
+; RUN: opt < %s -S -O1 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s
+
+; CHECK:      CoroSplit: Processing coroutine 'f' state: 0
+; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1
+
+define void @f() {
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  call void @print(i32 0)
+  %s1 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %s1, label %suspend [i8 0, label %resume 
+                                 i8 1, label %cleanup]
+resume:
+  call void @print(i32 1)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret void  
+}
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+declare i1 @llvm.coro.end(i8*, i1) 
+
+declare noalias i8* @malloc(i32)
+declare void @print(i32)
+declare void @free(i8*)

Added: llvm/trunk/test/Transforms/Coroutines/smoketest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Coroutines/smoketest.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Coroutines/smoketest.ll (added)
+++ llvm/trunk/test/Transforms/Coroutines/smoketest.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; Test that all coroutine passes run in the correct order at all optimization
+; levels and -enable-coroutines adds coroutine passes to the pipeline.
+;
+; RUN: opt < %s -disable-output -enable-coroutines -debug-pass=Arguments -O0 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output -enable-coroutines -debug-pass=Arguments -O1 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output -enable-coroutines -debug-pass=Arguments -O2 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output -enable-coroutines -debug-pass=Arguments -O3 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output -enable-coroutines -debug-pass=Arguments \
+; RUN:     -coro-early -coro-split -coro-elide -coro-cleanup 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output -debug-pass=Arguments 2>&1 \
+; RUN:     | FileCheck %s -check-prefix=NOCORO
+
+; CHECK: coro-early
+; CHECK: coro-split
+; CHECK: coro-elide
+; CHECK: coro-cleanup
+
+; NOCORO-NOT: coro-early
+; NOCORO-NOT: coro-split
+; NOCORO-NOT: coro-elide
+; NOCORO-NOT: coro-cleanup
+
+define void @foo() {
+  ret void
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt -S < %s -correlated-propagation | FileCheck %s
+
+; CHECK-LABEL: @test(
+define i16 @test(i32 %a, i1 %b) {
+entry:
+  %c = icmp eq i32 %a, 0
+  br i1 %c, label %left, label %right
+
+right:
+  %d = trunc i32 %a to i1
+  br label %merge
+
+left:
+  br i1 %b, label %merge, label %other
+
+other:
+  ret i16 23
+
+merge:
+  %f = phi i1 [%b, %left], [%d, %right]
+; CHECK: select i1 %f, i16 1, i16 0 
+  %h = select i1 %f, i16 1, i16 0 
+; CHECK: ret i16 %h
+  ret i16 %h
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,82 @@
+; RUN: opt < %s -jump-threading -correlated-propagation
+
+%struct.S2 = type {}
+
+ at g_128 = external global %struct.S2, align 1
+ at g_106 = external global i16, align 2
+
+define void @int328(i16 signext %p_82) noreturn nounwind ssp {
+entry:
+  %tobool3 = icmp eq i16 %p_82, 0
+  br label %for.cond.outer
+
+for.cond.outer:                                   ; preds = %for.cond.loopexit, %entry
+  br label %for.cond
+
+for.cond.loopexit:                                ; preds = %bb.nph, %for.cond9.preheader
+  br label %for.cond.outer
+
+for.cond.loopexit4.us-lcssa:                      ; preds = %if.then
+  br label %for.cond.loopexit4
+
+for.cond.loopexit4:                               ; preds = %for.cond.loopexit4.us-lcssa.us, %for.cond.loopexit4.us-lcssa
+  br label %for.cond.backedge
+
+for.cond:                                         ; preds = %for.cond.backedge, %for.cond.outer
+  br i1 %tobool3, label %for.cond.split.us, label %for.cond.for.cond.split_crit_edge
+
+for.cond.for.cond.split_crit_edge:                ; preds = %for.cond
+  br label %lbl_133
+
+for.cond.split.us:                                ; preds = %for.cond
+  br label %lbl_133.us
+
+lbl_133.us:                                       ; preds = %lbl_134.us, %for.cond.split.us
+  br i1 undef, label %if.else14.us-lcssa.us, label %if.then.us
+
+lbl_134.us:                                       ; preds = %if.then.us
+  br i1 icmp eq (i16 ptrtoint (%struct.S2* @g_128 to i16), i16 0), label %for.cond9.preheader.us-lcssa.us, label %lbl_133.us
+
+if.then.us:                                       ; preds = %lbl_133.us
+  br i1 true, label %for.cond.loopexit4.us-lcssa.us, label %lbl_134.us
+
+if.else14.us-lcssa.us:                            ; preds = %lbl_133.us
+  br label %if.else14
+
+for.cond9.preheader.us-lcssa.us:                  ; preds = %lbl_134.us
+  br label %for.cond9.preheader
+
+for.cond.loopexit4.us-lcssa.us:                   ; preds = %if.then.us
+  br label %for.cond.loopexit4
+
+lbl_133:                                          ; preds = %lbl_134, %for.cond.for.cond.split_crit_edge
+  %l_109.0 = phi i16 [ 0, %for.cond.for.cond.split_crit_edge ], [ ptrtoint (%struct.S2* @g_128 to i16), %lbl_134 ]
+  %tobool = icmp eq i32 undef, 0
+  br i1 %tobool, label %if.else14.us-lcssa, label %if.then
+
+if.then:                                          ; preds = %lbl_133
+  br i1 false, label %for.cond.loopexit4.us-lcssa, label %lbl_134
+
+lbl_134:                                          ; preds = %if.then
+  br i1 icmp eq (i16 ptrtoint (%struct.S2* @g_128 to i16), i16 0), label %for.cond9.preheader.us-lcssa, label %lbl_133
+
+for.cond9.preheader.us-lcssa:                     ; preds = %lbl_134
+  br label %for.cond9.preheader
+
+for.cond9.preheader:                              ; preds = %for.cond9.preheader.us-lcssa, %for.cond9.preheader.us-lcssa.us
+  br i1 undef, label %bb.nph, label %for.cond.loopexit
+
+bb.nph:                                           ; preds = %for.cond9.preheader
+  br label %for.cond.loopexit
+
+if.else14.us-lcssa:                               ; preds = %lbl_133
+  br label %if.else14
+
+if.else14:                                        ; preds = %if.else14.us-lcssa, %if.else14.us-lcssa.us
+  %l_109.0.lcssa = phi i16 [ %l_109.0, %if.else14.us-lcssa ], [ 0, %if.else14.us-lcssa.us ]
+  store i16 undef, i16* @g_106, align 2
+  br label %for.cond.backedge
+
+for.cond.backedge:                                ; preds = %if.else14, %for.cond.loopexit4
+  br label %for.cond
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/add.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/add.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/add.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,332 @@
+; RUN: opt < %s -correlated-propagation -cvp-dont-process-adds=false -S | FileCheck %s
+
+; CHECK-LABEL: @test0(
+define void @test0(i32 %a) {
+entry:
+  %cmp = icmp slt i32 %a, 100
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add nsw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test1(
+define void @test1(i32 %a) {
+entry:
+  %cmp = icmp ult i32 %a, 100
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add nuw nsw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test2(
+define void @test2(i32 %a) {
+entry:
+  %cmp = icmp ult i32 %a, -1
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add nuw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test3(
+define void @test3(i32 %a) {
+entry:
+  %cmp = icmp ule i32 %a, -1
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test4(
+define void @test4(i32 %a) {
+entry:
+  %cmp = icmp slt i32 %a, 2147483647
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add nsw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test5(
+define void @test5(i32 %a) {
+entry:
+  %cmp = icmp sle i32 %a, 2147483647
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check for a corner case where an integer value is represented with a constant
+; LVILatticeValue instead of constantrange. Check that we don't fail with an
+; assertion in this case.
+ at b = global i32 0, align 4
+define void @test6(i32 %a) {
+bb:
+  %add = add i32 %a, ptrtoint (i32* @b to i32)
+  ret void
+}
+
+; Check that we can gather information for conditions is the form of
+;   and ( i s< 100, Unknown )
+; CHECK-LABEL: @test7(
+define void @test7(i32 %a, i1 %flag) {
+entry:
+  %cmp.1 = icmp slt i32 %a, 100
+  %cmp = and i1 %cmp.1, %flag
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add nsw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can gather information for conditions is the form of
+;   and ( i s< 100, i s> 0 )
+; CHECK-LABEL: @test8(
+define void @test8(i32 %a) {
+entry:
+  %cmp.1 = icmp slt i32 %a, 100
+  %cmp.2 = icmp sgt i32 %a, 0
+  %cmp = and i1 %cmp.1, %cmp.2
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add nuw nsw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that for conditions is the form of cond1 && cond2 we don't mistakenly
+; assume that !cond1 && !cond2 holds down to false path.
+; CHECK-LABEL: @test8_neg(
+define void @test8_neg(i32 %a) {
+entry:
+  %cmp.1 = icmp sge i32 %a, 100
+  %cmp.2 = icmp sle i32 %a, 0
+  %cmp = and i1 %cmp.1, %cmp.2
+  br i1 %cmp, label %exit, label %bb
+
+bb:
+; CHECK: %add = add i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can gather information for conditions is the form of
+;   and ( i s< 100, and (i s> 0, Unknown )
+; CHECK-LABEL: @test9(
+define void @test9(i32 %a, i1 %flag) {
+entry:
+  %cmp.1 = icmp slt i32 %a, 100
+  %cmp.2 = icmp sgt i32 %a, 0
+  %cmp.3 = and i1 %cmp.2, %flag
+  %cmp = and i1 %cmp.1, %cmp.3
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add nuw nsw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can gather information for conditions is the form of
+;   and ( i s< Unknown, ... )
+; CHECK-LABEL: @test10(
+define void @test10(i32 %a, i32 %b, i1 %flag) {
+entry:
+  %cmp.1 = icmp slt i32 %a, %b
+  %cmp = and i1 %cmp.1, %flag
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add nsw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+ at limit = external global i32
+; CHECK-LABEL: @test11(
+define i32 @test11(i32* %p, i32 %i) {
+  %limit = load i32, i32* %p, !range !{i32 0, i32 2147483647}
+  %within.1 = icmp ugt i32 %limit, %i
+  %i.plus.7 = add i32 %i, 7
+  %within.2 = icmp ugt i32 %limit, %i.plus.7
+  %within = and i1 %within.1, %within.2
+  br i1 %within, label %then, label %else
+
+then:
+; CHECK: %i.plus.6 = add nuw nsw i32 %i, 6
+  %i.plus.6 = add i32 %i, 6
+  ret i32 %i.plus.6
+
+else:
+  ret i32 0
+}
+
+; Check that we can gather information for conditions is the form of
+;   or ( i s>= 100, Unknown )
+; CHECK-LABEL: @test12(
+define void @test12(i32 %a, i1 %flag) {
+entry:
+  %cmp.1 = icmp sge i32 %a, 100
+  %cmp = or i1 %cmp.1, %flag
+  br i1 %cmp, label %exit, label %bb
+
+bb:
+; CHECK: %add = add nsw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can gather information for conditions is the form of
+;   or ( i s>= 100, i s<= 0 )
+; CHECK-LABEL: @test13(
+define void @test13(i32 %a) {
+entry:
+  %cmp.1 = icmp sge i32 %a, 100
+  %cmp.2 = icmp sle i32 %a, 0
+  %cmp = or i1 %cmp.1, %cmp.2
+  br i1 %cmp, label %exit, label %bb
+
+bb:
+; CHECK: %add = add nuw nsw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that for conditions is the form of cond1 || cond2 we don't mistakenly
+; assume that cond1 || cond2 holds down to true path.
+; CHECK-LABEL: @test13_neg(
+define void @test13_neg(i32 %a) {
+entry:
+  %cmp.1 = icmp slt i32 %a, 100
+  %cmp.2 = icmp sgt i32 %a, 0
+  %cmp = or i1 %cmp.1, %cmp.2
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can gather information for conditions is the form of
+;   or ( i s>=100, or (i s<= 0, Unknown )
+; CHECK-LABEL: @test14(
+define void @test14(i32 %a, i1 %flag) {
+entry:
+  %cmp.1 = icmp sge i32 %a, 100
+  %cmp.2 = icmp sle i32 %a, 0
+  %cmp.3 = or i1 %cmp.2, %flag
+  %cmp = or i1 %cmp.1, %cmp.3
+  br i1 %cmp, label %exit, label %bb
+
+bb:
+; CHECK: %add = add nuw nsw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; Check that we can gather information for conditions is the form of
+;   or ( i s>= Unknown, ... )
+; CHECK-LABEL: @test15(
+define void @test15(i32 %a, i32 %b, i1 %flag) {
+entry:
+  %cmp.1 = icmp sge i32 %a, %b
+  %cmp = or i1 %cmp.1, %flag
+  br i1 %cmp, label %exit, label %bb
+
+bb:
+; CHECK: %add = add nsw i32 %a, 1
+  %add = add i32 %a, 1
+  br label %exit
+
+exit:
+  ret void
+}
+
+; single basic block loop
+; because the loop exit condition is SLT, we can supplement the iv add
+; (iv.next def) with an nsw.
+; CHECK-LABEL: @test16(
+define i32 @test16(i32* %n, i32* %a) {
+preheader:
+  br label %loop
+
+loop:
+; CHECK: %iv.next = add nsw i32 %iv, 1
+  %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
+  %acc = phi i32 [ 0, %preheader ], [ %acc.curr, %loop ]
+  %x = load atomic i32, i32* %a unordered, align 8
+  fence acquire
+  %acc.curr = add i32 %acc, %x
+  %iv.next = add i32 %iv, 1
+  %nval = load atomic i32, i32* %n unordered, align 8
+  %cmp = icmp slt i32 %iv.next, %nval
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i32 %acc.curr
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/alloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/alloca.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/alloca.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/alloca.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,49 @@
+; RUN: opt -S -correlated-propagation -debug-only=lazy-value-info <%s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+;
+; Shortcut in Correlated Value Propagation ensures not to take Lazy Value Info
+; analysis for %a.i and %tmp because %a.i is defined by alloca and %tmp is
+; defined by alloca + bitcast. We know the ret value of alloca is nonnull.
+;
+; CHECK-NOT: LVI Getting edge value   %a.i = alloca i64, align 8 at 'for.body'
+; CHECK-NOT: LVI Getting edge value   %tmp = bitcast i64* %a.i to i8* from 'for.cond' to 'for.body'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = private unnamed_addr constant [8 x i8] c"a = %l\0A\00", align 1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+
+declare void @hoo(i64*)
+
+declare i32 @printf(i8* nocapture readonly, ...)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+define void @goo(i32 %N, i64* %b) {
+entry:
+  %a.i = alloca i64, align 8
+  %tmp = bitcast i64* %a.i to i8*
+  %c = getelementptr inbounds i64, i64* %b, i64 0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp)
+  call void @hoo(i64* %a.i)
+  call void @hoo(i64* %c)
+  %tmp1 = load volatile i64, i64* %a.i, align 8
+  %call.i = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i64 %tmp1)
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/ashr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/ashr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/ashr.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/ashr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,105 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+; Check that debug locations are preserved. For more info see:
+;   https://llvm.org/docs/SourceLevelDebugging.html#fixing-errors
+; RUN: opt < %s -enable-debugify -correlated-propagation -S 2>&1 | \
+; RUN:   FileCheck %s -check-prefix=DEBUG
+; DEBUG: CheckModuleDebugify: PASS
+
+; CHECK-LABEL: @test1
+define void @test1(i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %a = phi i32 [ %n, %entry ], [ %shr, %for.body ]
+  %cmp = icmp sgt i32 %a, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+; CHECK: lshr i32 %a, 5
+  %shr = ashr i32 %a, 5
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+;; Negative test to show transform doesn't happen unless n > 0.
+; CHECK-LABEL: @test2
+define void @test2(i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %a = phi i32 [ %n, %entry ], [ %shr, %for.body ]
+  %cmp = icmp sgt i32 %a, -2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+; CHECK: ashr i32 %a, 2
+  %shr = ashr i32 %a, 2
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+;; Non looping test case.
+; CHECK-LABEL: @test3
+define void @test3(i32 %n) {
+entry:
+  %cmp = icmp sgt i32 %n, 0
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: lshr exact i32 %n, 4
+  %shr = ashr exact i32 %n, 4
+  br label %exit
+
+exit:
+  ret void
+}
+
+; looping case where loop has exactly one block
+; at the point of ashr, we know that the operand is always greater than 0,
+; because of the guard before it, so we can transform it to lshr.
+declare void @llvm.experimental.guard(i1,...)
+; CHECK-LABEL: @test4
+define void @test4(i32 %n) {
+entry:
+  %cmp = icmp sgt i32 %n, 0
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+; CHECK: lshr i32 %a, 1
+  %a = phi i32 [ %n, %entry ], [ %shr, %loop ]
+  %cond = icmp sgt i32 %a, 2
+  call void(i1,...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  %shr = ashr i32 %a, 1
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; same test as above with assume instead of guard.
+declare void @llvm.assume(i1)
+; CHECK-LABEL: @test5
+define void @test5(i32 %n) {
+entry:
+  %cmp = icmp sgt i32 %n, 0
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+; CHECK: lshr i32 %a, 1
+  %a = phi i32 [ %n, %entry ], [ %shr, %loop ]
+  %cond = icmp sgt i32 %a, 4
+  call void @llvm.assume(i1 %cond)
+  %shr = ashr i32 %a, 1
+  %loopcond = icmp sgt i32 %shr, 8
+  br i1 %loopcond, label %loop, label %exit
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/basic.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,725 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+; PR2581
+
+define i32 @test1(i1 %C) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[EXIT:%.*]], label [[BODY:%.*]]
+; CHECK:       body:
+; CHECK-NEXT:    ret i32 11
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 10
+;
+  br i1 %C, label %exit, label %body
+
+body:           ; preds = %0
+  %A = select i1 %C, i32 10, i32 11
+  ret i32 %A
+
+exit:           ; preds = %0
+  ret i32 10
+}
+
+; PR4420
+declare i1 @ext()
+define i1 @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND:%.*]] = tail call i1 @ext()
+; CHECK-NEXT:    br i1 [[COND]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[COND2:%.*]] = tail call i1 @ext()
+; CHECK-NEXT:    br i1 [[COND2]], label [[BB3:%.*]], label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       bb3:
+; CHECK-NEXT:    [[RES:%.*]] = tail call i1 @ext()
+; CHECK-NEXT:    ret i1 [[RES]]
+;
+entry:
+  %cond = tail call i1 @ext()
+  br i1 %cond, label %bb1, label %bb2
+
+bb1:
+  %cond2 = tail call i1 @ext()
+  br i1 %cond2, label %bb3, label %bb2
+
+bb2:
+  %cond_merge = phi i1 [ %cond, %entry ], [ false, %bb1 ]
+  ret i1 %cond_merge
+
+bb3:
+  %res = tail call i1 @ext()
+  ret i1 %res
+}
+
+; PR4855
+ at gv = internal constant i8 7
+define i8 @test3(i8* %a) nounwind {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8* [[A:%.*]], @gv
+; CHECK-NEXT:    br i1 [[COND]], label [[BB2:%.*]], label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret i8 0
+; CHECK:       bb2:
+; CHECK-NEXT:    [[SHOULD_BE_CONST:%.*]] = load i8, i8* @gv
+; CHECK-NEXT:    ret i8 [[SHOULD_BE_CONST]]
+;
+entry:
+  %cond = icmp eq i8* %a, @gv
+  br i1 %cond, label %bb2, label %bb
+
+bb:
+  ret i8 0
+
+bb2:
+  %should_be_const = load i8, i8* %a
+  ret i8 %should_be_const
+}
+
+; PR1757
+define i32 @test4(i32) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  EntryBlock:
+; CHECK-NEXT:    [[DOTDEMORGAN:%.*]] = icmp sgt i32 [[TMP0:%.*]], 2
+; CHECK-NEXT:    br i1 [[DOTDEMORGAN]], label [[GREATERTHANTWO:%.*]], label [[LESSTHANOREQUALTOTWO:%.*]]
+; CHECK:       GreaterThanTwo:
+; CHECK-NEXT:    br i1 false, label [[IMPOSSIBLE:%.*]], label [[NOTTWOANDGREATERTHANTWO:%.*]]
+; CHECK:       NotTwoAndGreaterThanTwo:
+; CHECK-NEXT:    ret i32 2
+; CHECK:       Impossible:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       LessThanOrEqualToTwo:
+; CHECK-NEXT:    ret i32 0
+;
+EntryBlock:
+  %.demorgan = icmp sgt i32 %0, 2
+  br i1 %.demorgan, label %GreaterThanTwo, label %LessThanOrEqualToTwo
+
+GreaterThanTwo:
+  icmp eq i32 %0, 2
+  br i1 %1, label %Impossible, label %NotTwoAndGreaterThanTwo
+
+NotTwoAndGreaterThanTwo:
+  ret i32 2
+
+Impossible:
+  ret i32 1
+
+LessThanOrEqualToTwo:
+  ret i32 0
+}
+
+declare i32* @f(i32*)
+define void @test5(i32* %x, i32* %y) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PRE:%.*]] = icmp eq i32* [[X:%.*]], null
+; CHECK-NEXT:    br i1 [[PRE]], label [[RETURN:%.*]], label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32* [ [[F:%.*]], [[LOOP]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[F]] = tail call i32* @f(i32* [[PHI]])
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32* [[F]], [[Y:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32* [[F]], i32* null
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32* [[SEL]], null
+; CHECK-NEXT:    br i1 [[CMP2]], label [[RETURN]], label [[LOOP]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %pre = icmp eq i32* %x, null
+  br i1 %pre, label %return, label %loop
+
+loop:
+  %phi = phi i32* [ %sel, %loop ], [ %x, %entry ]
+  %f = tail call i32* @f(i32* %phi)
+  %cmp1 = icmp ne i32* %f, %y
+  %sel = select i1 %cmp1, i32* %f, i32* null
+  %cmp2 = icmp eq i32* %sel, null
+  br i1 %cmp2, label %return, label %loop
+
+return:
+  ret void
+}
+
+define i32 @switch1(i32 %s) {
+; CHECK-LABEL: @switch1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[S:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[NEGATIVE:%.*]], label [[OUT:%.*]]
+; CHECK:       negative:
+; CHECK-NEXT:    switch i32 [[S]], label [[OUT]] [
+; CHECK-NEXT:    i32 -2, label [[NEXT:%.*]]
+; CHECK-NEXT:    i32 -1, label [[NEXT]]
+; CHECK-NEXT:    ]
+; CHECK:       out:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ -1, [[NEGATIVE]] ]
+; CHECK-NEXT:    ret i32 [[P]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %cmp = icmp slt i32 %s, 0
+  br i1 %cmp, label %negative, label %out
+
+negative:
+  switch i32 %s, label %out [
+  i32 0, label %out
+  i32 1, label %out
+  i32 -1, label %next
+  i32 -2, label %next
+  i32 2, label %out
+  i32 3, label %out
+  ]
+
+out:
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %negative ], [ 0, %negative ]
+  ret i32 %q
+}
+
+define i32 @switch2(i32 %s) {
+; CHECK-LABEL: @switch2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[S:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[POSITIVE:%.*]], label [[OUT:%.*]]
+; CHECK:       positive:
+; CHECK-NEXT:    br label [[OUT]]
+; CHECK:       out:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ 1, [[POSITIVE]] ]
+; CHECK-NEXT:    ret i32 [[P]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+  switch i32 %s, label %out [
+  i32 0, label %out
+  i32 -1, label %next
+  i32 -2, label %next
+  ]
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+define i32 @switch3(i32 %s) {
+; CHECK-LABEL: @switch3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[S:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[POSITIVE:%.*]], label [[OUT:%.*]]
+; CHECK:       positive:
+; CHECK-NEXT:    br label [[OUT]]
+; CHECK:       out:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ 1, [[POSITIVE]] ]
+; CHECK-NEXT:    ret i32 [[P]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+  switch i32 %s, label %out [
+  i32 -1, label %out
+  i32 -2, label %next
+  i32 -3, label %next
+  ]
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+define void @switch4(i32 %s) {
+; CHECK-LABEL: @switch4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[S:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[ZERO:%.*]], label [[OUT:%.*]]
+; CHECK:       zero:
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       out:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp eq i32 %s, 0
+  br i1 %cmp, label %zero, label %out
+
+zero:
+  switch i32 %s, label %out [
+  i32 0, label %next
+  i32 1, label %out
+  i32 -1, label %out
+  ]
+
+out:
+  ret void
+
+next:
+  ret void
+}
+
+define i1 @arg_attribute(i8* nonnull %a) {
+; CHECK-LABEL: @arg_attribute(
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = icmp eq i8* %a, null
+  br label %exit
+
+exit:
+  ret i1 %cmp
+}
+
+declare nonnull i8* @return_nonnull()
+define i1 @call_attribute() {
+; CHECK-LABEL: @call_attribute(
+; CHECK-NEXT:    [[A:%.*]] = call i8* @return_nonnull()
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[A]], null
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i1 false
+;
+  %a = call i8* @return_nonnull()
+  %cmp = icmp eq i8* %a, null
+  br label %exit
+
+exit:
+  ret i1 %cmp
+}
+
+define i1 @umin(i32 %a, i32 %b) {
+; CHECK-LABEL: @umin(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP]], label [[A_GUARD:%.*]], label [[OUT:%.*]]
+; CHECK:       a_guard:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[B:%.*]], 20
+; CHECK-NEXT:    br i1 [[CMP2]], label [[B_GUARD:%.*]], label [[OUT]]
+; CHECK:       b_guard:
+; CHECK-NEXT:    [[SEL_CMP:%.*]] = icmp ult i32 [[A]], [[B]]
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[SEL_CMP]], i32 [[A]], i32 [[B]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[MIN]], 7
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       out:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %cmp = icmp ult i32 %a, 5
+  br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+  %cmp2 = icmp ult i32 %b, 20
+  br i1 %cmp2, label %b_guard, label %out
+
+b_guard:
+  %sel_cmp = icmp ult i32 %a, %b
+  %min = select i1 %sel_cmp, i32 %a, i32 %b
+  %res = icmp eq i32 %min, 7
+  br label %next
+next:
+  ret i1 %res
+out:
+  ret i1 false
+}
+
+define i1 @smin(i32 %a, i32 %b) {
+; CHECK-LABEL: @smin(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP]], label [[A_GUARD:%.*]], label [[OUT:%.*]]
+; CHECK:       a_guard:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[B:%.*]], 20
+; CHECK-NEXT:    br i1 [[CMP2]], label [[B_GUARD:%.*]], label [[OUT]]
+; CHECK:       b_guard:
+; CHECK-NEXT:    [[SEL_CMP:%.*]] = icmp sle i32 [[A]], [[B]]
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[SEL_CMP]], i32 [[A]], i32 [[B]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[MIN]], 7
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       out:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %cmp = icmp ult i32 %a, 5
+  br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+  %cmp2 = icmp ult i32 %b, 20
+  br i1 %cmp2, label %b_guard, label %out
+
+b_guard:
+  %sel_cmp = icmp sle i32 %a, %b
+  %min = select i1 %sel_cmp, i32 %a, i32 %b
+  %res = icmp eq i32 %min, 7
+  br label %next
+next:
+  ret i1 %res
+out:
+  ret i1 false
+}
+
+define i1 @smax(i32 %a, i32 %b) {
+; CHECK-LABEL: @smax(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP]], label [[A_GUARD:%.*]], label [[OUT:%.*]]
+; CHECK:       a_guard:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[B:%.*]], 20
+; CHECK-NEXT:    br i1 [[CMP2]], label [[B_GUARD:%.*]], label [[OUT]]
+; CHECK:       b_guard:
+; CHECK-NEXT:    [[SEL_CMP:%.*]] = icmp sge i32 [[A]], [[B]]
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[SEL_CMP]], i32 [[A]], i32 [[B]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[MAX]], 7
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       out:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+  %cmp2 = icmp sgt i32 %b, 20
+  br i1 %cmp2, label %b_guard, label %out
+
+b_guard:
+  %sel_cmp = icmp sge i32 %a, %b
+  %max = select i1 %sel_cmp, i32 %a, i32 %b
+  %res = icmp eq i32 %max, 7
+  br label %next
+next:
+  ret i1 %res
+out:
+  ret i1 false
+}
+
+define i1 @umax(i32 %a, i32 %b) {
+; CHECK-LABEL: @umax(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP]], label [[A_GUARD:%.*]], label [[OUT:%.*]]
+; CHECK:       a_guard:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[B:%.*]], 20
+; CHECK-NEXT:    br i1 [[CMP2]], label [[B_GUARD:%.*]], label [[OUT]]
+; CHECK:       b_guard:
+; CHECK-NEXT:    [[SEL_CMP:%.*]] = icmp uge i32 [[A]], [[B]]
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[SEL_CMP]], i32 [[A]], i32 [[B]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[MAX]], 7
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       out:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+  %cmp2 = icmp sgt i32 %b, 20
+  br i1 %cmp2, label %b_guard, label %out
+
+b_guard:
+  %sel_cmp = icmp uge i32 %a, %b
+  %max = select i1 %sel_cmp, i32 %a, i32 %b
+  %res = icmp eq i32 %max, 7
+  br label %next
+next:
+  ret i1 %res
+out:
+  ret i1 false
+}
+
+define i1 @clamp_low1(i32 %a) {
+; CHECK-LABEL: @clamp_low1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP]], label [[A_GUARD:%.*]], label [[OUT:%.*]]
+; CHECK:       a_guard:
+; CHECK-NEXT:    [[SEL_CMP:%.*]] = icmp eq i32 [[A]], 5
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A]], -1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[SEL_CMP]], i32 5, i32 [[A]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[SEL]], 4
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       out:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %cmp = icmp sge i32 %a, 5
+  br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+  %sel_cmp = icmp eq i32 %a, 5
+  %add = add i32 %a, -1
+  %sel = select i1 %sel_cmp, i32 5, i32 %a
+  %res = icmp eq i32 %sel, 4
+  br label %next
+next:
+  ret i1 %res
+out:
+  ret i1 false
+}
+
+define i1 @clamp_low2(i32 %a) {
+; CHECK-LABEL: @clamp_low2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP]], label [[A_GUARD:%.*]], label [[OUT:%.*]]
+; CHECK:       a_guard:
+; CHECK-NEXT:    [[SEL_CMP:%.*]] = icmp ne i32 [[A]], 5
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A]], -1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[SEL_CMP]], i32 [[A]], i32 5
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[SEL]], 4
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       out:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %cmp = icmp sge i32 %a, 5
+  br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+  %sel_cmp = icmp ne i32 %a, 5
+  %add = add i32 %a, -1
+  %sel = select i1 %sel_cmp, i32 %a, i32 5
+  %res = icmp eq i32 %sel, 4
+  br label %next
+next:
+  ret i1 %res
+out:
+  ret i1 false
+}
+
+define i1 @clamp_high1(i32 %a) {
+; CHECK-LABEL: @clamp_high1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP]], label [[A_GUARD:%.*]], label [[OUT:%.*]]
+; CHECK:       a_guard:
+; CHECK-NEXT:    [[SEL_CMP:%.*]] = icmp eq i32 [[A]], 5
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[SEL_CMP]], i32 5, i32 [[A]]
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[SEL]], 6
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       out:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %cmp = icmp sle i32 %a, 5
+  br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+  %sel_cmp = icmp eq i32 %a, 5
+  %add = add i32 %a, 1
+  %sel = select i1 %sel_cmp, i32 5, i32 %a
+  %res = icmp eq i32 %sel, 6
+  br label %next
+next:
+  ret i1 %res
+out:
+  ret i1 false
+}
+
+define i1 @clamp_high2(i32 %a) {
+; CHECK-LABEL: @clamp_high2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP]], label [[A_GUARD:%.*]], label [[OUT:%.*]]
+; CHECK:       a_guard:
+; CHECK-NEXT:    [[SEL_CMP:%.*]] = icmp ne i32 [[A]], 5
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A]], 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[SEL_CMP]], i32 [[A]], i32 5
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[SEL]], 6
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       out:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %cmp = icmp sle i32 %a, 5
+  br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+  %sel_cmp = icmp ne i32 %a, 5
+  %add = add i32 %a, 1
+  %sel = select i1 %sel_cmp, i32 %a, i32 5
+  %res = icmp eq i32 %sel, 6
+  br label %next
+next:
+  ret i1 %res
+out:
+  ret i1 false
+}
+
+; Just showing arbitrary constants work, not really a clamp
+define i1 @clamp_high3(i32 %a) {
+; CHECK-LABEL: @clamp_high3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP]], label [[A_GUARD:%.*]], label [[OUT:%.*]]
+; CHECK:       a_guard:
+; CHECK-NEXT:    [[SEL_CMP:%.*]] = icmp ne i32 [[A]], 5
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[A]], 100
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[SEL_CMP]], i32 [[A]], i32 5
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32 [[SEL]], 105
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       out:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %cmp = icmp sle i32 %a, 5
+  br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+  %sel_cmp = icmp ne i32 %a, 5
+  %add = add i32 %a, 100
+  %sel = select i1 %sel_cmp, i32 %a, i32 5
+  %res = icmp eq i32 %sel, 105
+  br label %next
+next:
+  ret i1 %res
+out:
+  ret i1 false
+}
+
+define i1 @zext_unknown(i8 %a) {
+; CHECK-LABEL: @zext_unknown(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A32:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[A32]], 256
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %a32 = zext i8 %a to i32
+  %cmp = icmp sle i32 %a32, 256
+  br label %exit
+exit:
+  ret i1 %cmp
+}
+
+define i1 @trunc_unknown(i32 %a) {
+; CHECK-LABEL: @trunc_unknown(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A8:%.*]] = trunc i32 [[A:%.*]] to i8
+; CHECK-NEXT:    [[A32:%.*]] = sext i8 [[A8]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[A32]], 128
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %a8 = trunc i32 %a to i8
+  %a32 = sext i8 %a8 to i32
+  %cmp = icmp sle i32 %a32, 128
+  br label %exit
+exit:
+  ret i1 %cmp
+}
+
+; TODO: missed optimization
+; Make sure we exercise non-integer inputs to unary operators (i.e. crash check).
+define i1 @bitcast_unknown(float %a) {
+; CHECK-LABEL: @bitcast_unknown(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A32:%.*]] = bitcast float [[A:%.*]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[A32]], 128
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %a32 = bitcast float %a to i32
+  %cmp = icmp sle i32 %a32, 128
+  br label %exit
+exit:
+  ret i1 %cmp
+}
+
+define i1 @bitcast_unknown2(i8* %p) {
+; CHECK-LABEL: @bitcast_unknown2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P64:%.*]] = ptrtoint i8* [[P:%.*]] to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i64 [[P64]], 128
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %p64 = ptrtoint i8* %p to i64
+  %cmp = icmp sle i64 %p64, 128
+  br label %exit
+exit:
+  ret i1 %cmp
+}
+
+
+define i1 @and_unknown(i32 %a) {
+; CHECK-LABEL: @and_unknown(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], 128
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[AND]], 128
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %and = and i32 %a, 128
+  %cmp = icmp sle i32 %and, 128
+  br label %exit
+exit:
+  ret i1 %cmp
+}
+
+define i1 @lshr_unknown(i32 %a) {
+; CHECK-LABEL: @lshr_unknown(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AND:%.*]] = lshr i32 [[A:%.*]], 30
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[AND]], 128
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %and = lshr i32 %a, 30
+  %cmp = icmp sle i32 %and, 128
+  br label %exit
+exit:
+  ret i1 %cmp
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/conflict.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/conflict.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/conflict.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/conflict.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -correlated-propagation -S < %s | FileCheck %s
+
+; Checks that we don't crash on conflicting facts about a value
+; (i.e. unreachable code)
+
+; Test that we can handle conflict edge facts
+
+define i8 @test(i8 %a) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP1]], label [[NEXT:%.*]], label [[EXIT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    br i1 false, label [[DEAD:%.*]], label [[EXIT]]
+; CHECK:       dead:
+; CHECK-NEXT:    ret i8 5
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 0
+;
+  %cmp1 = icmp eq i8 %a, 5
+  br i1 %cmp1, label %next, label %exit
+next:
+  %cmp2 = icmp eq i8 %a, 3
+  br i1 %cmp2, label %dead, label %exit
+dead:
+; NOTE: undef, or 3 would be equal valid
+  ret i8 %a
+exit:
+  ret i8 0
+}
+
+declare void @llvm.assume(i1)
+
+; Test that we can handle conflicting assume vs edge facts
+
+define i8 @test2(i8 %a) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[A:%.*]], 5
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP1]])
+; CHECK-NEXT:    br i1 false, label [[DEAD:%.*]], label [[EXIT:%.*]]
+; CHECK:       dead:
+; CHECK-NEXT:    ret i8 5
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 0
+;
+  %cmp1 = icmp eq i8 %a, 5
+  call void @llvm.assume(i1 %cmp1)
+  %cmp2 = icmp eq i8 %a, 3
+  br i1 %cmp2, label %dead, label %exit
+dead:
+  ret i8 %a
+exit:
+  ret i8 0
+}
+
+define i8 @test3(i8 %a) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[A:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP1]], label [[DEAD:%.*]], label [[EXIT:%.*]]
+; CHECK:       dead:
+; CHECK-NEXT:    call void @llvm.assume(i1 false)
+; CHECK-NEXT:    ret i8 5
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 0
+;
+  %cmp1 = icmp eq i8 %a, 5
+  br i1 %cmp1, label %dead, label %exit
+dead:
+  %cmp2 = icmp eq i8 %a, 3
+  call void @llvm.assume(i1 %cmp2)
+  ret i8 %a
+exit:
+  ret i8 0
+}
+

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/crash.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,202 @@
+; RUN: opt < %s -correlated-propagation
+
+; PR8161
+define void @test1() nounwind ssp {
+entry:
+  br label %for.end
+
+for.cond.us.us:                                   ; preds = %for.cond.us.us
+  %cmp6.i.us.us = icmp sgt i32 1, 0
+  %lor.ext.i.us.us = zext i1 %cmp6.i.us.us to i32
+  %lor.ext.add.i.us.us = select i1 %cmp6.i.us.us, i32 %lor.ext.i.us.us, i32 undef
+  %conv.i.us.us = trunc i32 %lor.ext.add.i.us.us to i16
+  %sext.us.us = shl i16 %conv.i.us.us, 8
+  %conv6.us.us = ashr i16 %sext.us.us, 8
+  %and.us.us = and i16 %conv6.us.us, %and.us.us
+  br i1 false, label %for.end, label %for.cond.us.us
+
+for.end:                                          ; preds = %for.cond.us, %for.cond.us.us, %entry
+  ret void
+}
+
+; PR 8790
+define void @test2() nounwind ssp {
+entry:
+  br label %func_29.exit
+
+sdf.exit.i:
+  %l_44.1.mux.i = select i1 %tobool5.not.i, i8 %l_44.1.mux.i, i8 1
+  br label %srf.exit.i
+
+srf.exit.i:
+  %tobool5.not.i = icmp ne i8 undef, 0
+  br i1 %tobool5.not.i, label %sdf.exit.i, label %func_29.exit
+
+func_29.exit:
+  ret void
+}
+
+; PR13972
+define void @test3() nounwind {
+for.body:
+  br label %return
+
+for.cond.i:                                       ; preds = %if.else.i, %for.body.i
+  %e.2.i = phi i32 [ %e.2.i, %if.else.i ], [ -8, %for.body.i ]
+  br i1 undef, label %return, label %for.body.i
+
+for.body.i:                                       ; preds = %for.cond.i
+  switch i32 %e.2.i, label %for.cond3.i [
+    i32 -3, label %if.else.i
+    i32 0, label %for.cond.i
+  ]
+
+for.cond3.i:                                      ; preds = %for.cond3.i, %for.body.i
+  br label %for.cond3.i
+
+if.else.i:                                        ; preds = %for.body.i
+  br label %for.cond.i
+
+return:                                           ; preds = %for.cond.i, %for.body
+  ret void
+}
+
+define i1 @test4(i32 %int) {
+  %a0 = icmp ult i32 %int, 100
+  %a1 = and i1 %a0, %a0
+  %a2 = and i1 %a1, %a1
+  %a3 = and i1 %a2, %a2
+  %a4 = and i1 %a3, %a3
+  %a5 = and i1 %a4, %a4
+  %a6 = and i1 %a5, %a5
+  %a7 = and i1 %a6, %a6
+  %a8 = and i1 %a7, %a7
+  %a9 = and i1 %a8, %a8
+  %a10 = and i1 %a9, %a9
+  %a11 = and i1 %a10, %a10
+  %a12 = and i1 %a11, %a11
+  %a13 = and i1 %a12, %a12
+  %a14 = and i1 %a13, %a13
+  %a15 = and i1 %a14, %a14
+  %a16 = and i1 %a15, %a15
+  %a17 = and i1 %a16, %a16
+  %a18 = and i1 %a17, %a17
+  %a19 = and i1 %a18, %a18
+  %a20 = and i1 %a19, %a19
+  %a21 = and i1 %a20, %a20
+  %a22 = and i1 %a21, %a21
+  %a23 = and i1 %a22, %a22
+  %a24 = and i1 %a23, %a23
+  %a25 = and i1 %a24, %a24
+  %a26 = and i1 %a25, %a25
+  %a27 = and i1 %a26, %a26
+  %a28 = and i1 %a27, %a27
+  %a29 = and i1 %a28, %a28
+  %a30 = and i1 %a29, %a29
+  %a31 = and i1 %a30, %a30
+  %a32 = and i1 %a31, %a31
+  %a33 = and i1 %a32, %a32
+  %a34 = and i1 %a33, %a33
+  %a35 = and i1 %a34, %a34
+  %a36 = and i1 %a35, %a35
+  %a37 = and i1 %a36, %a36
+  %a38 = and i1 %a37, %a37
+  %a39 = and i1 %a38, %a38
+  %a40 = and i1 %a39, %a39
+  %a41 = and i1 %a40, %a40
+  %a42 = and i1 %a41, %a41
+  %a43 = and i1 %a42, %a42
+  %a44 = and i1 %a43, %a43
+  %a45 = and i1 %a44, %a44
+  %a46 = and i1 %a45, %a45
+  %a47 = and i1 %a46, %a46
+  %a48 = and i1 %a47, %a47
+  %a49 = and i1 %a48, %a48
+  %a50 = and i1 %a49, %a49
+  %a51 = and i1 %a50, %a50
+  %a52 = and i1 %a51, %a51
+  %a53 = and i1 %a52, %a52
+  %a54 = and i1 %a53, %a53
+  %a55 = and i1 %a54, %a54
+  %a56 = and i1 %a55, %a55
+  %a57 = and i1 %a56, %a56
+  %a58 = and i1 %a57, %a57
+  %a59 = and i1 %a58, %a58
+  %a60 = and i1 %a59, %a59
+  %a61 = and i1 %a60, %a60
+  %a62 = and i1 %a61, %a61
+  %a63 = and i1 %a62, %a62
+  %a64 = and i1 %a63, %a63
+  %a65 = and i1 %a64, %a64
+  %a66 = and i1 %a65, %a65
+  %a67 = and i1 %a66, %a66
+  %a68 = and i1 %a67, %a67
+  %a69 = and i1 %a68, %a68
+  %a70 = and i1 %a69, %a69
+  %a71 = and i1 %a70, %a70
+  %a72 = and i1 %a71, %a71
+  %a73 = and i1 %a72, %a72
+  %a74 = and i1 %a73, %a73
+  %a75 = and i1 %a74, %a74
+  %a76 = and i1 %a75, %a75
+  %a77 = and i1 %a76, %a76
+  %a78 = and i1 %a77, %a77
+  %a79 = and i1 %a78, %a78
+  %a80 = and i1 %a79, %a79
+  %a81 = and i1 %a80, %a80
+  %a82 = and i1 %a81, %a81
+  %a83 = and i1 %a82, %a82
+  %a84 = and i1 %a83, %a83
+  %a85 = and i1 %a84, %a84
+  %a86 = and i1 %a85, %a85
+  %a87 = and i1 %a86, %a86
+  %a88 = and i1 %a87, %a87
+  %a89 = and i1 %a88, %a88
+  %a90 = and i1 %a89, %a89
+  %a91 = and i1 %a90, %a90
+  %a92 = and i1 %a91, %a91
+  %a93 = and i1 %a92, %a92
+  %a94 = and i1 %a93, %a93
+  %a95 = and i1 %a94, %a94
+  %a96 = and i1 %a95, %a95
+  %a97 = and i1 %a96, %a96
+  %a98 = and i1 %a97, %a97
+  %a99 = and i1 %a98, %a98
+  %a100 = and i1 %a99, %a99
+  %a101 = and i1 %a100, %a100
+  %a102 = and i1 %a101, %a101
+  %a103 = and i1 %a102, %a102
+  %a104 = and i1 %a103, %a103
+  %a105 = and i1 %a104, %a104
+  %a106 = and i1 %a105, %a105
+  %a107 = and i1 %a106, %a106
+  %a108 = and i1 %a107, %a107
+  %a109 = and i1 %a108, %a108
+  %a110 = and i1 %a109, %a109
+  %a111 = and i1 %a110, %a110
+  %a112 = and i1 %a111, %a111
+  %a113 = and i1 %a112, %a112
+  %a114 = and i1 %a113, %a113
+  %a115 = and i1 %a114, %a114
+  %a116 = and i1 %a115, %a115
+  %a117 = and i1 %a116, %a116
+  %a118 = and i1 %a117, %a117
+  %a119 = and i1 %a118, %a118
+  %a120 = and i1 %a119, %a119
+  %a121 = and i1 %a120, %a120
+  %a122 = and i1 %a121, %a121
+  %a123 = and i1 %a122, %a122
+  %a124 = and i1 %a123, %a123
+  %a125 = and i1 %a124, %a124
+  %a126 = and i1 %a125, %a125
+  %a127 = and i1 %a126, %a126
+  %cond = and i1 %a127, %a127
+  br i1 %cond, label %then, label %else
+
+then:
+  %result = icmp eq i32 %int, 255
+  ret i1 %result
+
+else:
+  ret i1 false
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/deopt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/deopt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/deopt.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/deopt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -correlated-propagation -S < %s | FileCheck %s
+
+declare void @use()
+; test requires a mix of context sensative refinement, and analysis
+; of the originating IR pattern.  Neither part is enough in isolation.
+define void @test1(i1 %c, i1 %c2) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i64 -1, i64 1
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[C2:%.*]], i64 [[SEL]], i64 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[SEL2]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[TAKEN:%.*]], label [[UNTAKEN:%.*]]
+; CHECK:       taken:
+; CHECK-NEXT:    call void @use() [ "deopt"(i64 1) ]
+; CHECK-NEXT:    ret void
+; CHECK:       untaken:
+; CHECK-NEXT:    ret void
+;
+  %sel = select i1 %c, i64 -1, i64 1
+  %sel2 = select i1 %c2, i64 %sel, i64 0
+  %cmp = icmp sgt i64 %sel2, 0
+  br i1 %cmp, label %taken, label %untaken
+taken:
+  call void @use() ["deopt" (i64 %sel2)]
+  ret void
+untaken:
+  ret void
+}
+
+declare void @llvm.assume(i1)
+declare void @llvm.experimental.guard(i1,...)
+
+; Same as test1, but with assume not branch
+define void @test1_assume(i1 %c, i1 %c2) {
+; CHECK-LABEL: @test1_assume(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i64 -1, i64 1
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[C2:%.*]], i64 [[SEL]], i64 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[SEL2]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    call void @use() [ "deopt"(i64 1) ]
+; CHECK-NEXT:    ret void
+;
+  %sel = select i1 %c, i64 -1, i64 1
+  %sel2 = select i1 %c2, i64 %sel, i64 0
+  %cmp = icmp sgt i64 %sel2, 0
+  call void @llvm.assume(i1 %cmp)
+  call void @use() ["deopt" (i64 %sel2)]
+  ret void
+}
+
+; Same as test1, but with guard not branch
+define void @test1_guard(i1 %c, i1 %c2) {
+; CHECK-LABEL: @test1_guard(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i64 -1, i64 1
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[C2:%.*]], i64 [[SEL]], i64 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[SEL2]], 0
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[CMP]]) [ "deopt"(i64 [[SEL2]]) ]
+; CHECK-NEXT:    call void @use() [ "deopt"(i64 1) ]
+; CHECK-NEXT:    ret void
+;
+  %sel = select i1 %c, i64 -1, i64 1
+  %sel2 = select i1 %c2, i64 %sel, i64 0
+  %cmp = icmp sgt i64 %sel2, 0
+  call void (i1, ...) @llvm.experimental.guard(i1 %cmp) ["deopt" (i64 %sel2)]
+  call void @use() ["deopt" (i64 %sel2)]
+  ret void
+}
+
+;; The rest of these are slight variations on the patterns
+;; producing 1 of several adjacent constants to test generality
+
+define void @test2(i1 %c, i1 %c2) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i64 0, i64 1
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[C2:%.*]], i64 [[SEL]], i64 -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[SEL2]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[TAKEN:%.*]], label [[UNTAKEN:%.*]]
+; CHECK:       taken:
+; CHECK-NEXT:    call void @use() [ "deopt"(i64 1) ]
+; CHECK-NEXT:    ret void
+; CHECK:       untaken:
+; CHECK-NEXT:    ret void
+;
+  %sel = select i1 %c, i64 0, i64 1
+  %sel2 = select i1 %c2, i64 %sel, i64 -1
+  %cmp = icmp sgt i64 %sel2, 0
+  br i1 %cmp, label %taken, label %untaken
+taken:
+  call void @use() ["deopt" (i64 %sel2)]
+  ret void
+untaken:
+  ret void
+}
+define void @test3(i1 %c, i1 %c2) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i64 0, i64 1
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[C2:%.*]], i64 [[SEL]], i64 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[SEL2]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[TAKEN:%.*]], label [[UNTAKEN:%.*]]
+; CHECK:       taken:
+; CHECK-NEXT:    call void @use() [ "deopt"(i64 2) ]
+; CHECK-NEXT:    ret void
+; CHECK:       untaken:
+; CHECK-NEXT:    ret void
+;
+  %sel = select i1 %c, i64 0, i64 1
+  %sel2 = select i1 %c2, i64 %sel, i64 2
+  %cmp = icmp sgt i64 %sel2, 1
+  br i1 %cmp, label %taken, label %untaken
+taken:
+  call void @use() ["deopt" (i64 %sel2)]
+  ret void
+untaken:
+  ret void
+}
+
+define void @test4(i1 %c, i1 %c2) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C:%.*]], i64 0, i64 1
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[C2:%.*]], i64 0, i64 1
+; CHECK-NEXT:    [[ADD1:%.*]] = add i64 0, [[SEL]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i64 [[ADD1]], [[SEL2]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[ADD2]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[TAKEN:%.*]], label [[UNTAKEN:%.*]]
+; CHECK:       taken:
+; CHECK-NEXT:    call void @use() [ "deopt"(i64 2) ]
+; CHECK-NEXT:    ret void
+; CHECK:       untaken:
+; CHECK-NEXT:    ret void
+;
+  %sel = select i1 %c, i64 0, i64 1
+  %sel2 = select i1 %c2, i64 0, i64 1
+  %add1 = add i64 0, %sel
+  %add2 = add i64 %add1, %sel2
+  %cmp = icmp sgt i64 %add2, 1
+  br i1 %cmp, label %taken, label %untaken
+taken:
+  call void @use() ["deopt" (i64 %add2)]
+  ret void
+untaken:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/guards.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/guards.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/guards.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/guards.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,112 @@
+; RUN: opt -correlated-propagation -S < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1,...)
+
+define i1 @test1(i32 %a) {
+; CHECK-LABEL: @test1(
+; CHECK: %alive = icmp eq i32 %a, 8
+; CHECK-NEXT: %result = or i1 false, %alive
+  %cmp = icmp ult i32 %a, 16
+  call void(i1,...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  %dead = icmp eq i32 %a, 16
+  %alive = icmp eq i32 %a, 8
+  %result = or i1 %dead, %alive
+  ret i1 %result
+}
+
+define i1 @test2(i32 %a) {
+; CHECK-LABEL: @test2(
+; CHECK: continue:
+; CHECK-NEXT: %alive = icmp eq i32 %a, 8
+; CHECK-NEXT: %result = or i1 false, %alive
+  %cmp = icmp ult i32 %a, 16
+  call void(i1,...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  br label %continue
+
+continue:
+  %dead = icmp eq i32 %a, 16
+  %alive = icmp eq i32 %a, 8
+  %result = or i1 %dead, %alive
+  ret i1 %result
+}
+
+define i1 @test3(i32 %a, i1 %flag) {
+; CHECK-LABEL: @test3(
+; CHECK: continue:
+; CHECK-NEXT: %alive.1 = icmp eq i32 %a, 16
+; CHECK-NEXT: %alive.2 = icmp eq i32 %a, 8
+; CHECK-NEXT: %result = or i1 %alive.1, %alive.2
+  br i1 %flag, label %true, label %false
+
+true:
+  %cmp = icmp ult i32 %a, 16
+  call void(i1,...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  br label %continue
+
+false:
+  br label %continue
+
+continue:
+  %alive.1 = icmp eq i32 %a, 16
+  %alive.2 = icmp eq i32 %a, 8
+  %result = or i1 %alive.1, %alive.2
+  ret i1 %result
+}
+
+define i1 @test4(i32 %a, i1 %flag) {
+; CHECK-LABEL: @test4(
+; CHECK: continue:
+; CHECK-NEXT: %alive = icmp eq i32 %a, 12
+; CHECK-NEXT: %result = or i1 false, %alive
+  br i1 %flag, label %true, label %false
+
+true:
+  %cmp.t = icmp ult i32 %a, 16
+  call void(i1,...) @llvm.experimental.guard(i1 %cmp.t) [ "deopt"() ]
+  br label %continue
+
+false:
+  %cmp.f = icmp ult i32 %a, 12
+  call void(i1,...) @llvm.experimental.guard(i1 %cmp.f) [ "deopt"() ]
+  br label %continue
+
+continue:
+  %dead = icmp eq i32 %a, 16
+  %alive = icmp eq i32 %a, 12
+  %result = or i1 %dead, %alive
+  ret i1 %result
+}
+
+define i1 @test5(i32 %a) {
+; CHECK-LABEL: @test5(
+; CHECK: continue:
+; CHECK-NEXT: %alive = icmp eq i32 %a.plus.8, 16
+; CHECK-NEXT: %result = or i1 false, %alive
+  %cmp = icmp ult i32 %a, 16
+  call void(i1,...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  %a.plus.8 = add i32 %a, 8
+  br label %continue
+
+continue:
+  %dead = icmp eq i32 %a.plus.8, 24
+  %alive = icmp eq i32 %a.plus.8, 16
+  %result = or i1 %dead, %alive
+  ret i1 %result
+}
+
+; Check that we handle the case when the guard is the very first instruction in
+; a basic block.
+define i1 @test6(i32 %a) {
+; CHECK-LABEL: @test6(
+; CHECK: %alive = icmp eq i32 %a, 8
+; CHECK-NEXT: %result = or i1 false, %alive
+  %cmp = icmp ult i32 %a, 16
+  br label %continue
+
+continue:
+  call void(i1,...) @llvm.experimental.guard(i1 %cmp) [ "deopt"() ]
+  %dead = icmp eq i32 %a, 16
+  %alive = icmp eq i32 %a, 8
+  %result = or i1 %dead, %alive
+  ret i1 %result
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/icmp.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/icmp.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/icmp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -correlated-propagation -S %s | FileCheck %s
+; RUN: opt -passes=correlated-propagation -S %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+declare void @check1(i1) #1
+declare void @check2(i1) #1
+
+; Make sure we propagate the value of %tmp35 to the true/false cases
+
+define void @test1(i64 %tmp35) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP36:%.*]] = icmp sgt i64 [[TMP35:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP36]], label [[BB_TRUE:%.*]], label [[BB_FALSE:%.*]]
+; CHECK:       bb_true:
+; CHECK-NEXT:    tail call void @check1(i1 false) #0
+; CHECK-NEXT:    unreachable
+; CHECK:       bb_false:
+; CHECK-NEXT:    tail call void @check2(i1 true) #0
+; CHECK-NEXT:    unreachable
+;
+bb:
+  %tmp36 = icmp sgt i64 %tmp35, 0
+  br i1 %tmp36, label %bb_true, label %bb_false
+
+bb_true:
+  %tmp47 = icmp slt i64 %tmp35, 0
+  tail call void @check1(i1 %tmp47) #4
+  unreachable
+
+bb_false:
+  %tmp48 = icmp sle i64 %tmp35, 0
+  tail call void @check2(i1 %tmp48) #4
+  unreachable
+}
+
+; This is the same as test1 but with a diamond to ensure we
+; get %tmp36 from both true and false BBs.
+
+define void @test2(i64 %tmp35, i1 %inner_cmp) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP36:%.*]] = icmp sgt i64 [[TMP35:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP36]], label [[BB_TRUE:%.*]], label [[BB_FALSE:%.*]]
+; CHECK:       bb_true:
+; CHECK-NEXT:    br i1 [[INNER_CMP:%.*]], label [[INNER_TRUE:%.*]], label [[INNER_FALSE:%.*]]
+; CHECK:       inner_true:
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       inner_false:
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    tail call void @check1(i1 false)
+; CHECK-NEXT:    unreachable
+; CHECK:       bb_false:
+; CHECK-NEXT:    tail call void @check2(i1 true) #0
+; CHECK-NEXT:    unreachable
+;
+bb:
+  %tmp36 = icmp sgt i64 %tmp35, 0
+  br i1 %tmp36, label %bb_true, label %bb_false
+
+bb_true:
+  br i1 %inner_cmp, label %inner_true, label %inner_false
+
+inner_true:
+  br label %merge
+
+inner_false:
+  br label %merge
+
+merge:
+  %tmp47 = icmp slt i64 %tmp35, 0
+  tail call void @check1(i1 %tmp47) #0
+  unreachable
+
+bb_false:
+  %tmp48 = icmp sle i64 %tmp35, 0
+  tail call void @check2(i1 %tmp48) #4
+  unreachable
+}
+
+; Make sure binary operator transfer functions are run when RHS is non-constant
+
+define i1 @test3(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[X:%.*]], 10
+; CHECK-NEXT:    br i1 [[CMP1]], label [[CONT1:%.*]], label [[OUT:%.*]]
+; CHECK:       cont1:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[Y:%.*]], 10
+; CHECK-NEXT:    br i1 [[CMP2]], label [[CONT2:%.*]], label [[OUT]]
+; CHECK:       cont2:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], [[Y]]
+; CHECK-NEXT:    br label [[CONT3:%.*]]
+; CHECK:       cont3:
+; CHECK-NEXT:    br label [[OUT]]
+; CHECK:       out:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %cmp1 = icmp ult i32 %x, 10
+  br i1 %cmp1, label %cont1, label %out
+
+cont1:
+  %cmp2 = icmp ult i32 %y, 10
+  br i1 %cmp2, label %cont2, label %out
+
+cont2:
+  %add = add i32 %x, %y
+  br label %cont3
+
+cont3:
+  %cmp3 = icmp ult i32 %add, 25
+  br label %out
+
+out:
+  %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ]
+  ret i1 %ret
+}
+
+; Same as previous but make sure nobody gets over-zealous
+
+define i1 @test4(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[X:%.*]], 10
+; CHECK-NEXT:    br i1 [[CMP1]], label [[CONT1:%.*]], label [[OUT:%.*]]
+; CHECK:       cont1:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[Y:%.*]], 10
+; CHECK-NEXT:    br i1 [[CMP2]], label [[CONT2:%.*]], label [[OUT]]
+; CHECK:       cont2:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], [[Y]]
+; CHECK-NEXT:    br label [[CONT3:%.*]]
+; CHECK:       cont3:
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ult i32 [[ADD]], 15
+; CHECK-NEXT:    br label [[OUT]]
+; CHECK:       out:
+; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ true, [[CONT1]] ], [ [[CMP3]], [[CONT3]] ]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+entry:
+  %cmp1 = icmp ult i32 %x, 10
+  br i1 %cmp1, label %cont1, label %out
+
+cont1:
+  %cmp2 = icmp ult i32 %y, 10
+  br i1 %cmp2, label %cont2, label %out
+
+cont2:
+  %add = add i32 %x, %y
+  br label %cont3
+
+cont3:
+  %cmp3 = icmp ult i32 %add, 15
+  br label %out
+
+out:
+  %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ]
+  ret i1 %ret
+}
+
+; Make sure binary operator transfer functions are run when RHS is non-constant
+
+define i1 @test5(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[X:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP1]], label [[CONT1:%.*]], label [[OUT:%.*]]
+; CHECK:       cont1:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[Y:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP2]], label [[CONT2:%.*]], label [[OUT]]
+; CHECK:       cont2:
+; CHECK-NEXT:    [[SHIFTED:%.*]] = shl i32 [[X]], [[Y]]
+; CHECK-NEXT:    br label [[CONT3:%.*]]
+; CHECK:       cont3:
+; CHECK-NEXT:    br label [[OUT]]
+; CHECK:       out:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %cmp1 = icmp ult i32 %x, 5
+  br i1 %cmp1, label %cont1, label %out
+
+cont1:
+  %cmp2 = icmp ult i32 %y, 5
+  br i1 %cmp2, label %cont2, label %out
+
+cont2:
+  %shifted = shl i32 %x, %y
+  br label %cont3
+
+cont3:
+  %cmp3 = icmp ult i32 %shifted, 65536
+  br label %out
+
+out:
+  %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ]
+  ret i1 %ret
+}
+
+; Same as previous but make sure nobody gets over-zealous
+
+define i1 @test6(i32 %x, i32 %y) #0 {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[X:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP1]], label [[CONT1:%.*]], label [[OUT:%.*]]
+; CHECK:       cont1:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[Y:%.*]], 15
+; CHECK-NEXT:    br i1 [[CMP2]], label [[CONT2:%.*]], label [[OUT]]
+; CHECK:       cont2:
+; CHECK-NEXT:    [[SHIFTED:%.*]] = shl i32 [[X]], [[Y]]
+; CHECK-NEXT:    br label [[CONT3:%.*]]
+; CHECK:       cont3:
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ult i32 [[SHIFTED]], 65536
+; CHECK-NEXT:    br label [[OUT]]
+; CHECK:       out:
+; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ true, [[CONT1]] ], [ [[CMP3]], [[CONT3]] ]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+entry:
+  %cmp1 = icmp ult i32 %x, 5
+  br i1 %cmp1, label %cont1, label %out
+
+cont1:
+  %cmp2 = icmp ult i32 %y, 15
+  br i1 %cmp2, label %cont2, label %out
+
+cont2:
+  %shifted = shl i32 %x, %y
+  br label %cont3
+
+cont3:
+  %cmp3 = icmp ult i32 %shifted, 65536
+  br label %out
+
+out:
+  %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ]
+  ret i1 %ret
+}
+
+attributes #4 = { noreturn }

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/non-null.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/non-null.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/non-null.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/non-null.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,336 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+define void @test1(i8* %ptr) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[A:%.*]] = load i8, i8* [[PTR:%.*]]
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+;
+  %A = load i8, i8* %ptr
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+  ret void
+}
+
+define void @test1_no_null_opt(i8* %ptr) #0 {
+; CHECK-LABEL: @test1_no_null_opt(
+; CHECK-NEXT:    [[A:%.*]] = load i8, i8* [[PTR:%.*]]
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8* [[PTR]], null
+; CHECK-NEXT:    ret void
+;
+  %A = load i8, i8* %ptr
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+  ret void
+}
+
+define void @test2(i8* %ptr) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    store i8 0, i8* [[PTR:%.*]]
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+;
+  store i8 0, i8* %ptr
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+  ret void
+}
+
+define void @test2_no_null_opt(i8* %ptr) #0 {
+; CHECK-LABEL: @test2_no_null_opt(
+; CHECK-NEXT:    store i8 0, i8* [[PTR:%.*]]
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8* [[PTR]], null
+; CHECK-NEXT:    ret void
+;
+  store i8 0, i8* %ptr
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+  ret void
+}
+
+define void @test3() {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+;
+  %ptr = alloca i8
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+  ret void
+}
+
+;; OK to remove icmp here since ptr is coming from alloca.
+
+define void @test3_no_null_opt() #0 {
+; CHECK-LABEL: @test3_no_null_opt(
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+;
+  %ptr = alloca i8
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+
+define void @test4(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DEST:%.*]], i8* [[SRC:%.*]], i32 1, i1 false)
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+  icmp ne i8* %src, null
+  ret void
+}
+
+define void @test4_no_null_opt(i8* %dest, i8* %src) #0 {
+; CHECK-LABEL: @test4_no_null_opt(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DEST:%.*]], i8* [[SRC:%.*]], i32 1, i1 false)
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8* [[DEST]], null
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8* [[SRC]], null
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+  icmp ne i8* %src, null
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+define void @test5(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i32(i8* [[DEST:%.*]], i8* [[SRC:%.*]], i32 1, i1 false)
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+  icmp ne i8* %src, null
+  ret void
+}
+
+define void @test5_no_null_opt(i8* %dest, i8* %src) #0 {
+; CHECK-LABEL: @test5_no_null_opt(
+; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i32(i8* [[DEST:%.*]], i8* [[SRC:%.*]], i32 1, i1 false)
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8* [[DEST]], null
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8* [[SRC]], null
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+  icmp ne i8* %src, null
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)
+define void @test6(i8* %dest) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* [[DEST:%.*]], i8 -1, i32 1, i1 false)
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memset.p0i8.i32(i8* %dest, i8 255, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+  ret void
+}
+
+define void @test6_no_null_opt(i8* %dest) #0 {
+; CHECK-LABEL: @test6_no_null_opt(
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* [[DEST:%.*]], i8 -1, i32 1, i1 false)
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8* [[DEST]], null
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memset.p0i8.i32(i8* %dest, i8 255, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+  ret void
+}
+
+define void @test7(i8* %dest, i8* %src, i32 %len) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DEST:%.*]], i8* [[SRC:%.*]], i32 [[LEN:%.*]], i1 false)
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[KEEP1:%.*]] = icmp ne i8* [[DEST]], null
+; CHECK-NEXT:    [[KEEP2:%.*]] = icmp ne i8* [[SRC]], null
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 false)
+  br label %bb
+bb:
+  %KEEP1 = icmp ne i8* %dest, null
+  %KEEP2 = icmp ne i8* %src, null
+  ret void
+}
+
+declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) *, i8 addrspace(1) *, i32, i1)
+define void @test8(i8 addrspace(1) * %dest, i8 addrspace(1) * %src) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    call void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* [[DEST:%.*]], i8 addrspace(1)* [[SRC:%.*]], i32 1, i1 false)
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[KEEP1:%.*]] = icmp ne i8 addrspace(1)* [[DEST]], null
+; CHECK-NEXT:    [[KEEP2:%.*]] = icmp ne i8 addrspace(1)* [[SRC]], null
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) * %dest, i8 addrspace(1) * %src, i32 1, i1 false)
+  br label %bb
+bb:
+  %KEEP1 = icmp ne i8 addrspace(1) * %dest, null
+  %KEEP2 = icmp ne i8 addrspace(1) * %src, null
+  ret void
+}
+
+define void @test9(i8* %dest, i8* %src) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DEST:%.*]], i8* [[SRC:%.*]], i32 1, i1 true)
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[KEEP1:%.*]] = icmp ne i8* [[DEST]], null
+; CHECK-NEXT:    [[KEEP2:%.*]] = icmp ne i8* [[SRC]], null
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 true)
+  br label %bb
+bb:
+  %KEEP1 = icmp ne i8* %dest, null
+  %KEEP2 = icmp ne i8* %src, null
+  ret void
+}
+
+declare void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+
+define void @test10(i8* %arg1, i8* %arg2, i32 %non-pointer-arg) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IS_NULL:%.*]] = icmp eq i8* [[ARG1:%.*]], null
+; CHECK-NEXT:    br i1 [[IS_NULL]], label [[NULL:%.*]], label [[NON_NULL:%.*]]
+; CHECK:       non_null:
+; CHECK-NEXT:    call void @test10_helper(i8* nonnull [[ARG1]], i8* [[ARG2:%.*]], i32 [[NON_POINTER_ARG:%.*]])
+; CHECK-NEXT:    br label [[NULL]]
+; CHECK:       null:
+; CHECK-NEXT:    call void @test10_helper(i8* [[ARG1]], i8* [[ARG2]], i32 [[NON_POINTER_ARG]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %is_null = icmp eq i8* %arg1, null
+  br i1 %is_null, label %null, label %non_null
+
+non_null:
+  call void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+  br label %null
+
+null:
+  call void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+  ret void
+}
+
+declare void @test11_helper(i8* %arg)
+
+define void @test11(i8* %arg1, i8** %arg2) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IS_NULL:%.*]] = icmp eq i8* [[ARG1:%.*]], null
+; CHECK-NEXT:    br i1 [[IS_NULL]], label [[NULL:%.*]], label [[NON_NULL:%.*]]
+; CHECK:       non_null:
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       null:
+; CHECK-NEXT:    [[ANOTHER_ARG:%.*]] = alloca i8
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[MERGED_ARG:%.*]] = phi i8* [ [[ANOTHER_ARG]], [[NULL]] ], [ [[ARG1]], [[NON_NULL]] ]
+; CHECK-NEXT:    call void @test11_helper(i8* nonnull [[MERGED_ARG]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %is_null = icmp eq i8* %arg1, null
+  br i1 %is_null, label %null, label %non_null
+
+non_null:
+  br label %merge
+
+null:
+  %another_arg = alloca i8
+  br label %merge
+
+merge:
+  %merged_arg = phi i8* [%another_arg, %null], [%arg1, %non_null]
+  call void @test11_helper(i8* %merged_arg)
+  ret void
+}
+
+declare void @test12_helper(i8* %arg)
+
+define void @test12(i8* %arg1, i8** %arg2) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IS_NULL:%.*]] = icmp eq i8* [[ARG1:%.*]], null
+; CHECK-NEXT:    br i1 [[IS_NULL]], label [[NULL:%.*]], label [[NON_NULL:%.*]]
+; CHECK:       non_null:
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       null:
+; CHECK-NEXT:    [[ANOTHER_ARG:%.*]] = load i8*, i8** [[ARG2:%.*]], !nonnull !0
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[MERGED_ARG:%.*]] = phi i8* [ [[ANOTHER_ARG]], [[NULL]] ], [ [[ARG1]], [[NON_NULL]] ]
+; CHECK-NEXT:    call void @test12_helper(i8* nonnull [[MERGED_ARG]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %is_null = icmp eq i8* %arg1, null
+  br i1 %is_null, label %null, label %non_null
+
+non_null:
+  br label %merge
+
+null:
+  %another_arg = load i8*, i8** %arg2, !nonnull !{}
+  br label %merge
+
+merge:
+  %merged_arg = phi i8* [%another_arg, %null], [%arg1, %non_null]
+  call void @test12_helper(i8* %merged_arg)
+  ret void
+}
+
+attributes #0 = { "null-pointer-is-valid"="true" }

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,726 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -correlated-propagation < %s | FileCheck %s
+
+declare void @llvm.trap()
+declare {i8, i1} @llvm.uadd.with.overflow(i8, i8)
+declare {i8, i1} @llvm.sadd.with.overflow(i8, i8)
+declare {i8, i1} @llvm.usub.with.overflow(i8, i8)
+declare {i8, i1} @llvm.ssub.with.overflow(i8, i8)
+declare {i8, i1} @llvm.umul.with.overflow(i8, i8)
+declare {i8, i1} @llvm.smul.with.overflow(i8, i8)
+
+define i1 @uadd_ov_false(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @uadd_ov_false(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp ugt i8 [[X]], -102
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp ugt i8 [[X]], -101
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 100)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %c1 = icmp ugt i8 %x, 154
+  store i1 %c1, i1* %pc
+  %c2 = icmp ugt i8 %x, 155
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @uadd_ov_true(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @uadd_ov_true(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[OVERFLOW:%.*]], label [[TRAP:%.*]]
+; CHECK:       overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp ugt i8 [[X]], -100
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp ugt i8 [[X]], -101
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 100)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %overflow, label %trap
+
+overflow:
+  %c1 = icmp ugt i8 %x, 156
+  store i1 %c1, i1* %pc
+  %c2 = icmp ugt i8 %x, 155
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @sadd_ov_false(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @sadd_ov_false(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp sgt i8 [[X]], 26
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp sgt i8 [[X]], 27
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.sadd.with.overflow(i8 %x, i8 100)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %c1 = icmp sgt i8 %x, 26
+  store i1 %c1, i1* %pc
+  %c2 = icmp sgt i8 %x, 27
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @sadd_ov_true(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @sadd_ov_true(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[OVERFLOW:%.*]], label [[TRAP:%.*]]
+; CHECK:       overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp sgt i8 [[X]], 28
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp sgt i8 [[X]], 27
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.sadd.with.overflow(i8 %x, i8 100)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %overflow, label %trap
+
+overflow:
+  %c1 = icmp sgt i8 %x, 28
+  store i1 %c1, i1* %pc
+  %c2 = icmp sgt i8 %x, 27
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @usub_ov_false(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @usub_ov_false(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp ult i8 [[X]], 101
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp ult i8 [[X]], 100
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.usub.with.overflow(i8 %x, i8 100)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %c1 = icmp ult i8 %x, 101
+  store i1 %c1, i1* %pc
+  %c2 = icmp ult i8 %x, 100
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @usub_ov_true(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @usub_ov_true(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[OVERFLOW:%.*]], label [[TRAP:%.*]]
+; CHECK:       overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp ult i8 [[X]], 99
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp ult i8 [[X]], 100
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.usub.with.overflow(i8 %x, i8 100)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %overflow, label %trap
+
+overflow:
+  %c1 = icmp ult i8 %x, 99
+  store i1 %c1, i1* %pc
+  %c2 = icmp ult i8 %x, 100
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @ssub_ov_false(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @ssub_ov_false(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt i8 [[X]], -27
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp slt i8 [[X]], -28
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.ssub.with.overflow(i8 %x, i8 100)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %c1 = icmp slt i8 %x, -27
+  store i1 %c1, i1* %pc
+  %c2 = icmp slt i8 %x, -28
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @ssub_ov_true(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @ssub_ov_true(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[OVERFLOW:%.*]], label [[TRAP:%.*]]
+; CHECK:       overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt i8 [[X]], -29
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp slt i8 [[X]], -28
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.ssub.with.overflow(i8 %x, i8 100)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %overflow, label %trap
+
+overflow:
+  %c1 = icmp slt i8 %x, -29
+  store i1 %c1, i1* %pc
+  %c2 = icmp slt i8 %x, -28
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @umul_ov_false(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @umul_ov_false(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp ugt i8 [[X]], 24
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp ugt i8 [[X]], 25
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.umul.with.overflow(i8 %x, i8 10)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %c1 = icmp ugt i8 %x, 24
+  store i1 %c1, i1* %pc
+  %c2 = icmp ugt i8 %x, 25
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @umul_ov_true(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @umul_ov_true(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[OVERFLOW:%.*]], label [[TRAP:%.*]]
+; CHECK:       overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp ugt i8 [[X]], 26
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp ugt i8 [[X]], 25
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.umul.with.overflow(i8 %x, i8 10)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %overflow, label %trap
+
+overflow:
+  %c1 = icmp ugt i8 %x, 26
+  store i1 %c1, i1* %pc
+  %c2 = icmp ugt i8 %x, 25
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+; Signed mul is constrained from both sides.
+define i1 @smul_ov_false_bound1(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @smul_ov_false_bound1(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt i8 [[X]], -11
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp slt i8 [[X]], -12
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.smul.with.overflow(i8 %x, i8 10)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %c1 = icmp slt i8 %x, -11
+  store i1 %c1, i1* %pc
+  %c2 = icmp slt i8 %x, -12
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @smul_ov_false_bound2(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @smul_ov_false_bound2(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp sgt i8 [[X]], 11
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp sgt i8 [[X]], 12
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.smul.with.overflow(i8 %x, i8 10)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %c1 = icmp sgt i8 %x, 11
+  store i1 %c1, i1* %pc
+  %c2 = icmp sgt i8 %x, 12
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+; Can't use slt/sgt to test for a hole in the range, check equality instead.
+define i1 @smul_ov_true_bound1(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @smul_ov_true_bound1(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[OVERFLOW:%.*]], label [[TRAP:%.*]]
+; CHECK:       overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp eq i8 [[X]], -13
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp eq i8 [[X]], -12
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.smul.with.overflow(i8 %x, i8 10)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %overflow, label %trap
+
+overflow:
+  %c1 = icmp eq i8 %x, -13
+  store i1 %c1, i1* %pc
+  %c2 = icmp eq i8 %x, -12
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @smul_ov_true_bound2(i8 %x, i8* %px, i1* %pc) {
+; CHECK-LABEL: @smul_ov_true_bound2(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    store i8 [[VAL]], i8* [[PX:%.*]]
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[OVERFLOW:%.*]], label [[TRAP:%.*]]
+; CHECK:       overflow:
+; CHECK-NEXT:    [[C1:%.*]] = icmp eq i8 [[X]], 13
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp eq i8 [[X]], 12
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.smul.with.overflow(i8 %x, i8 10)
+  %val = extractvalue {i8, i1} %val_ov, 0
+  store i8 %val, i8* %px
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %overflow, label %trap
+
+overflow:
+  %c1 = icmp eq i8 %x, 13
+  store i1 %c1, i1* %pc
+  %c2 = icmp eq i8 %x, 12
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @uadd_val(i8 %x, i1* %pc) {
+; CHECK-LABEL: @uadd_val(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    br label [[SPLIT:%.*]]
+; CHECK:       split:
+; CHECK-NEXT:    [[C1:%.*]] = icmp ugt i8 [[VAL]], 100
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp uge i8 [[VAL]], 100
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.uadd.with.overflow(i8 %x, i8 100)
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %val = extractvalue {i8, i1} %val_ov, 0
+  br label %split
+
+split:
+  %c1 = icmp ugt i8 %val, 100
+  store i1 %c1, i1* %pc
+  %c2 = icmp uge i8 %val, 100
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @sadd_val(i8 %x, i1* %pc) {
+; CHECK-LABEL: @sadd_val(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    br label [[SPLIT:%.*]]
+; CHECK:       split:
+; CHECK-NEXT:    [[C1:%.*]] = icmp sgt i8 [[VAL]], -28
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp sge i8 [[VAL]], -28
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.sadd.with.overflow(i8 %x, i8 100)
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %val = extractvalue {i8, i1} %val_ov, 0
+  br label %split
+
+split:
+  %c1 = icmp sgt i8 %val, -28
+  store i1 %c1, i1* %pc
+  %c2 = icmp sge i8 %val, -28
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @usub_val(i8 %x, i1* %pc) {
+; CHECK-LABEL: @usub_val(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    br label [[SPLIT:%.*]]
+; CHECK:       split:
+; CHECK-NEXT:    [[C1:%.*]] = icmp ult i8 [[VAL]], -101
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp ule i8 [[VAL]], -101
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.usub.with.overflow(i8 %x, i8 100)
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %val = extractvalue {i8, i1} %val_ov, 0
+  br label %split
+
+split:
+  %c1 = icmp ult i8 %val, 155
+  store i1 %c1, i1* %pc
+  %c2 = icmp ule i8 %val, 155
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @ssub_val(i8 %x, i1* %pc) {
+; CHECK-LABEL: @ssub_val(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 100)
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    br label [[SPLIT:%.*]]
+; CHECK:       split:
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt i8 [[VAL]], 27
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp sle i8 [[VAL]], 27
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.ssub.with.overflow(i8 %x, i8 100)
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %val = extractvalue {i8, i1} %val_ov, 0
+  br label %split
+
+split:
+  %c1 = icmp slt i8 %val, 27
+  store i1 %c1, i1* %pc
+  %c2 = icmp sle i8 %val, 27
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @umul_val(i8 %x, i1* %pc) {
+; CHECK-LABEL: @umul_val(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    br label [[SPLIT:%.*]]
+; CHECK:       split:
+; CHECK-NEXT:    [[C1:%.*]] = icmp ult i8 [[VAL]], -6
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp ule i8 [[VAL]], -6
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.umul.with.overflow(i8 %x, i8 10)
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %val = extractvalue {i8, i1} %val_ov, 0
+  br label %split
+
+split:
+  %c1 = icmp ult i8 %val, 250
+  store i1 %c1, i1* %pc
+  %c2 = icmp ule i8 %val, 250
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @smul_val_bound1(i8 %x, i1* %pc) {
+; CHECK-LABEL: @smul_val_bound1(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    br label [[SPLIT:%.*]]
+; CHECK:       split:
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt i8 [[VAL]], 120
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp sle i8 [[VAL]], 120
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.smul.with.overflow(i8 %x, i8 10)
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %val = extractvalue {i8, i1} %val_ov, 0
+  br label %split
+
+split:
+  %c1 = icmp slt i8 %val, 120
+  store i1 %c1, i1* %pc
+  %c2 = icmp sle i8 %val, 120
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}
+
+define i1 @smul_val_bound2(i8 %x, i1* %pc) {
+; CHECK-LABEL: @smul_val_bound2(
+; CHECK-NEXT:    [[VAL_OV:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 10)
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[NO_OVERFLOW:%.*]]
+; CHECK:       no_overflow:
+; CHECK-NEXT:    [[VAL:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 0
+; CHECK-NEXT:    br label [[SPLIT:%.*]]
+; CHECK:       split:
+; CHECK-NEXT:    [[C1:%.*]] = icmp sgt i8 [[VAL]], -120
+; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
+; CHECK-NEXT:    [[C2:%.*]] = icmp sge i8 [[VAL]], -120
+; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+;
+  %val_ov = call {i8, i1} @llvm.smul.with.overflow(i8 %x, i8 10)
+  %ov = extractvalue {i8, i1} %val_ov, 1
+  br i1 %ov, label %trap, label %no_overflow
+
+no_overflow:
+  %val = extractvalue {i8, i1} %val_ov, 0
+  br label %split
+
+split:
+  %c1 = icmp sgt i8 %val, -120
+  store i1 %c1, i1* %pc
+  %c2 = icmp sge i8 %val, -120
+  ret i1 %c2
+
+trap:
+  call void @llvm.trap()
+  unreachable
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/overflows.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/overflows.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/overflows.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/overflows.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,717 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -correlated-propagation < %s | FileCheck %s
+
+; Check that debug locations are preserved. For more info see:
+;   https://llvm.org/docs/SourceLevelDebugging.html#fixing-errors
+; RUN: opt < %s -enable-debugify -correlated-propagation -S 2>&1 | \
+; RUN:   FileCheck %s -check-prefix=DEBUG
+; DEBUG: CheckModuleDebugify: PASS
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
+
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32)
+
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32)
+
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
+
+declare void @llvm.trap()
+
+
+define i32 @signed_add(i32 %x, i32 %y) {
+; CHECK-LABEL: @signed_add(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[Y:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 2147483647, [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TRAP:%.*]], label [[CONT:%.*]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cont:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP4]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       lor.lhs.false:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[Y]], 0
+; CHECK-NEXT:    br i1 [[CMP2]], label [[LAND_LHS_TRUE3:%.*]], label [[COND_FALSE]]
+; CHECK:       land.lhs.true3:
+; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw i32 -2147483648, [[Y]]
+; CHECK-NEXT:    [[TMP6:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP5]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertvalue { i32, i1 } [[TMP6]], i1 false, 1
+; CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
+; CHECK-NEXT:    br i1 [[TMP8]], label [[TRAP]], label [[CONT4:%.*]]
+; CHECK:       cont4:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP7]], 0
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[X]]
+; CHECK-NEXT:    br i1 [[CMP5]], label [[COND_END]], label [[COND_FALSE]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[TMP10:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { i32, i1 } [[TMP10]], 0
+; CHECK-NEXT:    [[TMP12:%.*]] = extractvalue { i32, i1 } [[TMP10]], 1
+; CHECK-NEXT:    br i1 [[TMP12]], label [[TRAP]], label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[CONT4]] ], [ 0, [[CONT]] ], [ [[TMP11]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp sgt i32 %y, 0
+  br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true:                                    ; preds = %entry
+  %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 2147483647, i32 %y)
+  %1 = extractvalue { i32, i1 } %0, 1
+  br i1 %1, label %trap, label %cont
+
+trap:                                             ; preds = %land.lhs.true, %land.lhs.true3, %cond.false
+  tail call void @llvm.trap()
+  unreachable
+
+cont:                                             ; preds = %land.lhs.true
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp1 = icmp slt i32 %2, %x
+  br i1 %cmp1, label %cond.end, label %cond.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp2 = icmp slt i32 %y, 0
+  br i1 %cmp2, label %land.lhs.true3, label %cond.false
+
+land.lhs.true3:                                   ; preds = %lor.lhs.false
+  %3 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 -2147483648, i32 %y)
+  %4 = extractvalue { i32, i1 } %3, 1
+  br i1 %4, label %trap, label %cont4
+
+cont4:                                            ; preds = %land.lhs.true3
+  %5 = extractvalue { i32, i1 } %3, 0
+  %cmp5 = icmp sgt i32 %5, %x
+  br i1 %cmp5, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %cont, %cont4, %lor.lhs.false
+  %6 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+  %7 = extractvalue { i32, i1 } %6, 0
+  %8 = extractvalue { i32, i1 } %6, 1
+  br i1 %8, label %trap, label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cont, %cont4
+  %cond = phi i32 [ 0, %cont4 ], [ 0, %cont ], [ %7, %cond.false ]
+  ret i32 %cond
+}
+
+define i32 @unsigned_add(i32 %x, i32 %y) {
+; CHECK-LABEL: @unsigned_add(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub nuw i32 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TRAP:%.*]], label [[CONT:%.*]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cont:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[TMP4]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TRAP]], label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[CONT]] ], [ [[TMP6]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 -1, i32 %y)
+  %1 = extractvalue { i32, i1 } %0, 1
+  br i1 %1, label %trap, label %cont
+
+trap:                                             ; preds = %cond.false, %entry
+  tail call void @llvm.trap()
+  unreachable
+
+cont:                                             ; preds = %entry
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp1 = icmp ult i32 %2, %x
+  br i1 %cmp1, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %cont
+  %3 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+  %4 = extractvalue { i32, i1 } %3, 0
+  %5 = extractvalue { i32, i1 } %3, 1
+  br i1 %5, label %trap, label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cont
+  %cond = phi i32 [ 0, %cont ], [ %4, %cond.false ]
+  ret i32 %cond
+}
+
+define i32 @signed_sub(i32 %x, i32 %y) {
+; CHECK-LABEL: @signed_sub(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[Y:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[Y]], 2147483647
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TRAP:%.*]], label [[CONT:%.*]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cont:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP4]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       lor.lhs.false:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[Y]], 0
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_FALSE]], label [[LAND_LHS_TRUE3:%.*]]
+; CHECK:       land.lhs.true3:
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i32 [[Y]], -2147483648
+; CHECK-NEXT:    [[TMP6:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP5]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertvalue { i32, i1 } [[TMP6]], i1 false, 1
+; CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
+; CHECK-NEXT:    br i1 [[TMP8]], label [[TRAP]], label [[CONT4:%.*]]
+; CHECK:       cont4:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP7]], 0
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[X]]
+; CHECK-NEXT:    br i1 [[CMP5]], label [[COND_END]], label [[COND_FALSE]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[TMP10:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { i32, i1 } [[TMP10]], 0
+; CHECK-NEXT:    [[TMP12:%.*]] = extractvalue { i32, i1 } [[TMP10]], 1
+; CHECK-NEXT:    br i1 [[TMP12]], label [[TRAP]], label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[CONT4]] ], [ 0, [[CONT]] ], [ [[TMP11]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp slt i32 %y, 0
+  br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true:                                    ; preds = %entry
+  %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %y, i32 2147483647)
+  %1 = extractvalue { i32, i1 } %0, 1
+  br i1 %1, label %trap, label %cont
+
+trap:                                             ; preds = %land.lhs.true, %land.lhs.true3, %cond.false
+  tail call void @llvm.trap()
+  unreachable
+
+cont:                                             ; preds = %land.lhs.true
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp1 = icmp slt i32 %2, %x
+  br i1 %cmp1, label %cond.end, label %cond.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp2 = icmp eq i32 %y, 0
+  br i1 %cmp2, label %cond.false, label %land.lhs.true3
+
+land.lhs.true3:                                   ; preds = %lor.lhs.false
+  %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %y, i32 -2147483648)
+  %4 = extractvalue { i32, i1 } %3, 1
+  br i1 %4, label %trap, label %cont4
+
+cont4:                                            ; preds = %land.lhs.true3
+  %5 = extractvalue { i32, i1 } %3, 0
+  %cmp5 = icmp sgt i32 %5, %x
+  br i1 %cmp5, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %lor.lhs.false, %cont, %cont4
+  %6 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)
+  %7 = extractvalue { i32, i1 } %6, 0
+  %8 = extractvalue { i32, i1 } %6, 1
+  br i1 %8, label %trap, label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cont, %cont4
+  %cond = phi i32 [ 0, %cont4 ], [ 0, %cont ], [ %7, %cond.false ]
+  ret i32 %cond
+}
+
+define i32 @unsigned_sub(i32 %x, i32 %y) {
+; CHECK-LABEL: @unsigned_sub(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[TMP2]], label [[TRAP:%.*]], label [[COND_END]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP1]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp ult i32 %x, %y
+  br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
+  %1 = extractvalue { i32, i1 } %0, 0
+  %2 = extractvalue { i32, i1 } %0, 1
+  br i1 %2, label %trap, label %cond.end
+
+trap:                                             ; preds = %cond.false
+  tail call void @llvm.trap()
+  unreachable
+
+cond.end:                                         ; preds = %cond.false, %entry
+  %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ]
+  ret i32 %cond
+}
+
+define i32 @signed_add_r1(i32 %x) {
+; CHECK-LABEL: @signed_add_r1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[X]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP4]], label [[TRAP:%.*]], label [[COND_END]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp eq i32 %x, 2147483647
+  br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 0
+  %2 = extractvalue { i32, i1 } %0, 1
+  br i1 %2, label %trap, label %cond.end
+
+trap:                                             ; preds = %cond.false
+  tail call void @llvm.trap()
+  unreachable
+
+cond.end:                                         ; preds = %cond.false, %entry
+  %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ]
+  ret i32 %cond
+}
+
+define i32 @unsigned_add_r1(i32 %x) {
+; CHECK-LABEL: @unsigned_add_r1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], -1
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[X]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP4]], label [[TRAP:%.*]], label [[COND_END]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp eq i32 %x, -1
+  br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 0
+  %2 = extractvalue { i32, i1 } %0, 1
+  br i1 %2, label %trap, label %cond.end
+
+trap:                                             ; preds = %cond.false
+  tail call void @llvm.trap()
+  unreachable
+
+cond.end:                                         ; preds = %cond.false, %entry
+  %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ]
+  ret i32 %cond
+}
+
+define i32 @signed_sub_r1(i32 %x) {
+; CHECK-LABEL: @signed_sub_r1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 [[X]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP4]], label [[TRAP:%.*]], label [[COND_END]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp eq i32 %x, -2147483648
+  br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 0
+  %2 = extractvalue { i32, i1 } %0, 1
+  br i1 %2, label %trap, label %cond.end
+
+trap:                                             ; preds = %cond.false
+  tail call void @llvm.trap()
+  unreachable
+
+cond.end:                                         ; preds = %cond.false, %entry
+  %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ]
+  ret i32 %cond
+}
+
+define i32 @unsigned_sub_r1(i32 %x) {
+; CHECK-LABEL: @unsigned_sub_r1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub nuw i32 [[X]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP4]], label [[TRAP:%.*]], label [[COND_END]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 0
+  %2 = extractvalue { i32, i1 } %0, 1
+  br i1 %2, label %trap, label %cond.end
+
+trap:                                             ; preds = %cond.false
+  tail call void @llvm.trap()
+  unreachable
+
+cond.end:                                         ; preds = %cond.false, %entry
+  %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ]
+  ret i32 %cond
+}
+
+define i32 @signed_add_rn1(i32 %x) {
+; CHECK-LABEL: @signed_add_rn1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], -2147483648
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[X]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP4]], label [[TRAP:%.*]], label [[COND_END]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp eq i32 %x, -2147483648
+  br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 -1)
+  %1 = extractvalue { i32, i1 } %0, 0
+  %2 = extractvalue { i32, i1 } %0, 1
+  br i1 %2, label %trap, label %cond.end
+
+trap:                                             ; preds = %cond.false
+  tail call void @llvm.trap()
+  unreachable
+
+cond.end:                                         ; preds = %cond.false, %entry
+  %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ]
+  ret i32 %cond
+}
+
+define i32 @signed_sub_rn1(i32 %x) {
+; CHECK-LABEL: @signed_sub_rn1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 2147483647
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 [[X]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP4]], label [[TRAP:%.*]], label [[COND_END]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp eq i32 %x, 2147483647
+  br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 -1)
+  %1 = extractvalue { i32, i1 } %0, 0
+  %2 = extractvalue { i32, i1 } %0, 1
+  br i1 %2, label %trap, label %cond.end
+
+trap:                                             ; preds = %cond.false
+  tail call void @llvm.trap()
+  unreachable
+
+cond.end:                                         ; preds = %cond.false, %entry
+  %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ]
+  ret i32 %cond
+}
+
+define i32 @unsigned_mul(i32 %x) {
+; CHECK-LABEL: @unsigned_mul(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 10000
+; CHECK-NEXT:    br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[MULO1:%.*]] = mul nuw i32 [[X]], 100
+; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { i32, i1 } undef, i32 [[MULO1]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } [[TMP0]], i1 false, 1
+; CHECK-NEXT:    [[RES:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[COND_END]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RES]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp ugt i32 %x, 10000
+  br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  %mulo = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 100)
+  %res = extractvalue { i32, i1 } %mulo, 0
+  %ov = extractvalue { i32, i1 } %mulo, 1
+  br i1 %ov, label %trap, label %cond.end
+
+trap:                                             ; preds = %cond.false
+  tail call void @llvm.trap()
+  unreachable
+
+cond.end:                                         ; preds = %cond.false, %entry
+  %cond = phi i32 [ 0, %entry ], [ %res, %cond.false ]
+  ret i32 %cond
+}
+
+define i32 @signed_mul(i32 %x) {
+; CHECK-LABEL: @signed_mul(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[X:%.*]], 10000
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[X]], -10000
+; CHECK-NEXT:    [[CMP3:%.*]] = or i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[MULO1:%.*]] = mul nsw i32 [[X]], 100
+; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { i32, i1 } undef, i32 [[MULO1]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } [[TMP0]], i1 false, 1
+; CHECK-NEXT:    [[RES:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT:    br i1 [[OV]], label [[TRAP:%.*]], label [[COND_END]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RES]], [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp1 = icmp sgt i32 %x, 10000
+  %cmp2 = icmp slt i32 %x, -10000
+  %cmp3 = or i1 %cmp1, %cmp2
+  br i1 %cmp3, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  %mulo = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %x, i32 100)
+  %res = extractvalue { i32, i1 } %mulo, 0
+  %ov = extractvalue { i32, i1 } %mulo, 1
+  br i1 %ov, label %trap, label %cond.end
+
+trap:                                             ; preds = %cond.false
+  tail call void @llvm.trap()
+  unreachable
+
+cond.end:                                         ; preds = %cond.false, %entry
+  %cond = phi i32 [ 0, %entry ], [ %res, %cond.false ]
+  ret i32 %cond
+}
+
+declare i32 @bar(i32)
+
+define void @unsigned_loop(i32 %i) {
+; CHECK-LABEL: @unsigned_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[I:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP3]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
+; CHECK:       while.body.preheader:
+; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[I_ADDR_04:%.*]] = phi i32 [ [[TMP4:%.*]], [[CONT:%.*]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @bar(i32 [[I_ADDR_04]])
+; CHECK-NEXT:    [[TMP0:%.*]] = sub nuw i32 [[I_ADDR_04]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TRAP:%.*]], label [[CONT]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cont:
+; CHECK-NEXT:    [[TMP4]] = extractvalue { i32, i1 } [[TMP2]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp3 = icmp eq i32 %i, 0
+  br i1 %cmp3, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %cont
+  %i.addr.04 = phi i32 [ %2, %cont ], [ %i, %while.body.preheader ]
+  %call = tail call i32 @bar(i32 %i.addr.04)
+  %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.addr.04, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 1
+  br i1 %1, label %trap, label %cont
+
+trap:                                             ; preds = %while.body
+  tail call void @llvm.trap()
+  unreachable
+
+cont:                                             ; preds = %while.body
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp = icmp eq i32 %2, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %cont, %entry
+  ret void
+}
+
+define void @intrinsic_into_phi(i32 %n) {
+; CHECK-LABEL: @intrinsic_into_phi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[CONT:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[DOTLCSSA:%.*]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } undef, i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 false, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TRAP:%.*]], label [[CONT]]
+; CHECK:       trap:
+; CHECK-NEXT:    tail call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cont:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi { i32, i1 } [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP2]], [[FOR_COND:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+; CHECK-NEXT:    [[CALL9:%.*]] = tail call i32 @bar(i32 [[TMP5]])
+; CHECK-NEXT:    [[TOBOOL10:%.*]] = icmp eq i32 [[CALL9]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL10]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
+; CHECK:       while.body.preheader:
+; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
+; CHECK:       while.cond:
+; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP8:%.*]], 0
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @bar(i32 [[TMP6]])
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[CALL]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i32 [ [[TMP6]], [[WHILE_COND:%.*]] ], [ [[TMP5]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP8]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[TMP7]], i32 1)
+; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
+; CHECK-NEXT:    br i1 [[TMP9]], label [[TRAP]], label [[WHILE_COND]]
+; CHECK:       while.end:
+; CHECK-NEXT:    [[DOTLCSSA]] = phi i32 [ [[TMP5]], [[CONT]] ], [ [[TMP6]], [[WHILE_COND]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[DOTLCSSA]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[CLEANUP2:%.*]]
+; CHECK:       cleanup2:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %cont
+
+for.cond:                                         ; preds = %while.end
+  %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %.lcssa, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 1
+  br i1 %1, label %trap, label %cont
+
+trap:                                             ; preds = %for.cond, %while.body
+  tail call void @llvm.trap()
+  unreachable
+
+cont:                                             ; preds = %entry, %for.cond
+  %2 = phi { i32, i1 } [ zeroinitializer, %entry ], [ %0, %for.cond ]
+  %3 = extractvalue { i32, i1 } %2, 0
+  %call9 = tail call i32 @bar(i32 %3)
+  %tobool10 = icmp eq i32 %call9, 0
+  br i1 %tobool10, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %cont
+  br label %while.body
+
+while.cond:                                       ; preds = %while.body
+  %4 = extractvalue { i32, i1 } %6, 0
+  %call = tail call i32 @bar(i32 %4)
+  %tobool = icmp eq i32 %call, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.cond
+  %5 = phi i32 [ %4, %while.cond ], [ %3, %while.body.preheader ]
+  %6 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %5, i32 1)
+  %7 = extractvalue { i32, i1 } %6, 1
+  br i1 %7, label %trap, label %while.cond
+
+while.end:                                        ; preds = %while.cond, %cont
+  %.lcssa = phi i32 [ %3, %cont ], [ %4, %while.cond ]
+  %cmp = icmp slt i32 %.lcssa, %n
+  br i1 %cmp, label %for.cond, label %cleanup2
+
+cleanup2:                                         ; preds = %while.end
+  ret void
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/phi-common-val.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/phi-common-val.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/phi-common-val.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/phi-common-val.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+; RUN: opt < %s -passes="correlated-propagation" -S | FileCheck %s
+
+define i8* @simplify_phi_common_value_op0(i8* %ptr, i32* %b) {
+; CHECK-LABEL: @simplify_phi_common_value_op0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ISNULL:%.*]] = icmp eq i8* [[PTR:%.*]], null
+; CHECK-NEXT:    br i1 [[ISNULL]], label [[RETURN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[LB:%.*]] = load i32, i32* [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[LB]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[B]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i8* [[PTR]]
+;
+entry:
+  %isnull = icmp eq i8* %ptr, null
+  br i1 %isnull, label %return, label %else
+
+else:
+  %lb = load i32, i32* %b
+  %add = add nsw i32 %lb, 1
+  store i32 %add, i32* %b
+  br label %return
+
+return:
+  %r = phi i8* [ %ptr, %else ], [ null, %entry ]
+  ret i8* %r
+}
+
+define i8* @simplify_phi_common_value_op1(i8* %ptr, i32* %b) {
+; CHECK-LABEL: @simplify_phi_common_value_op1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ISNULL:%.*]] = icmp eq i8* [[PTR:%.*]], null
+; CHECK-NEXT:    br i1 [[ISNULL]], label [[RETURN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[LB:%.*]] = load i32, i32* [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LB]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[B]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i8* [[PTR]]
+;
+entry:
+  %isnull = icmp eq i8* %ptr, null
+  br i1 %isnull, label %return, label %else
+
+else:
+  %lb = load i32, i32* %b
+  %add = add i32 %lb, 1
+  store i32 %add, i32* %b
+  br label %return
+
+return:
+  %r = phi i8* [ null, %entry], [ %ptr, %else ]
+  ret i8* %r
+}
+
+define i8 @simplify_phi_multiple_constants(i8 %x, i32* %b) {
+; CHECK-LABEL: @simplify_phi_multiple_constants(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IS0:%.*]] = icmp eq i8 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[IS0]], label [[RETURN:%.*]], label [[ELSE1:%.*]]
+; CHECK:       else1:
+; CHECK-NEXT:    [[IS42:%.*]] = icmp eq i8 [[X]], 42
+; CHECK-NEXT:    br i1 [[IS42]], label [[RETURN]], label [[ELSE2:%.*]]
+; CHECK:       else2:
+; CHECK-NEXT:    [[LB:%.*]] = load i32, i32* [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[LB]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[B]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i8 [[X]]
+;
+entry:
+  %is0 = icmp eq i8 %x, 0
+  br i1 %is0, label %return, label %else1
+
+else1:
+  %is42 = icmp eq i8 %x, 42
+  br i1 %is42, label %return, label %else2
+
+else2:
+  %lb = load i32, i32* %b
+  %add = add i32 %lb, 1
+  store i32 %add, i32* %b
+  br label %return
+
+return:
+  %r = phi i8 [ 0, %entry], [ %x, %else2 ], [ 42, %else1 ]
+  ret i8 %r
+}
+
+define i8* @simplify_phi_common_value_from_instruction(i8* %ptr_op, i32* %b, i32 %i) {
+; CHECK-LABEL: @simplify_phi_common_value_from_instruction(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i8, i8* [[PTR_OP:%.*]], i32 [[I:%.*]]
+; CHECK-NEXT:    [[ISNULL:%.*]] = icmp eq i8* [[PTR]], null
+; CHECK-NEXT:    br i1 [[ISNULL]], label [[RETURN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[LB:%.*]] = load i32, i32* [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[LB]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[B]]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i8* [[PTR]]
+;
+entry:
+  %ptr = getelementptr i8, i8* %ptr_op, i32 %i
+  %isnull = icmp eq i8* %ptr, null
+  br i1 %isnull, label %return, label %else
+
+else:
+  %lb = load i32, i32* %b
+  %add = add nsw i32 %lb, 1
+  store i32 %add, i32* %b
+  br label %return
+
+return:
+  %r = phi i8* [ %ptr, %else ], [ null, %entry ]
+  ret i8* %r
+}
+

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/pointer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/pointer.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/pointer.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/pointer.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -correlated-propagation -S -o - %s | FileCheck %s
+
+; Testcase that checks that we don't end in a neverending recursion resulting in
+; a segmentation fault. The checks below verify that nothing is changed.
+
+declare dso_local i16* @f2(i16* readnone returned) local_unnamed_addr
+
+define dso_local void @f3() local_unnamed_addr {
+; CHECK-LABEL: @f3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[CALL6:%.*]] = call i16* @f2(i16* [[CALL6]])
+; CHECK-NEXT:    br i1 false, label [[FOR_COND]], label [[FOR_COND3:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[C_0:%.*]] = phi i16* [ undef, [[ENTRY:%.*]] ], [ [[CALL6]], [[FOR_END:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_COND3]]
+; CHECK:       for.cond3:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.cond
+
+for.end:
+  %call6 = call i16* @f2(i16* %call6)
+  br i1 false, label %for.cond, label %for.cond3
+
+for.cond:
+  %c.0 = phi i16* [ undef, %entry ], [ %call6, %for.end ]
+  br label %for.cond3
+
+for.cond3:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/pr35807.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/pr35807.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/pr35807.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/pr35807.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -correlated-propagation -S %s | FileCheck %s
+
+target triple = "x86_64-apple-darwin17.4.0"
+
+define void @patatino() {
+; CHECK-LABEL: @patatino(
+; CHECK-NEXT:    br i1 undef, label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK:       bb3:
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb4:
+; CHECK-NEXT:    br i1 undef, label [[BB40:%.*]], label [[BB22:%.*]]
+; CHECK:       bb7:
+; CHECK-NEXT:    br label [[BB14:%.*]]
+; CHECK:       bb12:
+; CHECK-NEXT:    br label [[BB14]]
+; CHECK:       bb14:
+; CHECK-NEXT:    [[TMP19:%.*]] = icmp sgt i32 undef, undef
+; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i64 [[TMP20]], i64 0
+; CHECK-NEXT:    br i1 undef, label [[BB40]], label [[BB7:%.*]]
+; CHECK:       bb22:
+; CHECK-NEXT:    br label [[BB24:%.*]]
+; CHECK:       bb24:
+; CHECK-NEXT:    br label [[BB32:%.*]]
+; CHECK:       bb32:
+; CHECK-NEXT:    br i1 undef, label [[BB40]], label [[BB24]]
+; CHECK:       bb40:
+; CHECK-NEXT:    [[TMP41:%.*]] = phi i64 [ 4, [[BB4]] ], [ [[TMP20]], [[BB14]] ], [ undef, [[BB32]] ]
+; CHECK-NEXT:    ret void
+;
+  br i1 undef, label %bb3, label %bb4
+
+bb3:
+  br label %bb3
+
+bb4:
+  br i1 undef, label %bb40, label %bb22
+
+bb7:
+  br label %bb14
+
+bb12:
+  br label %bb14
+
+; This block is unreachable. Due to the non-standard definition of
+; dominance in LLVM where uses in unreachable blocks are dominated
+; by anything, it contains an instruction of the form
+; %def = OP %def, %something
+bb14:
+  %tmp19 = icmp sgt i32 undef, undef
+  %tmp20 = select i1 %tmp19, i64 %tmp20, i64 0
+  br i1 undef, label %bb40, label %bb7
+
+bb22:
+  br label %bb24
+
+bb24:
+  br label %bb32
+
+bb32:
+  br i1 undef, label %bb40, label %bb24
+
+bb40:
+  %tmp41 = phi i64 [ 4, %bb4 ], [ %tmp20, %bb14 ], [ undef, %bb32 ]
+  ret void
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/range.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/range.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/range.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/range.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,917 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -correlated-propagation -S < %s | FileCheck %s
+
+declare i32 @foo()
+
+define i32 @test1(i32 %a) nounwind {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A_OFF]], 8
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br i1 false, label [[END:%.*]], label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       end:
+; CHECK-NEXT:    ret i32 2
+;
+  %a.off = add i32 %a, -8
+  %cmp = icmp ult i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, 7
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+}
+
+define i32 @test2(i32 %a) nounwind {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A_OFF]], 8
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br i1 false, label [[END:%.*]], label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       end:
+; CHECK-NEXT:    ret i32 2
+;
+  %a.off = add i32 %a, -8
+  %cmp = icmp ult i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp ugt i32 %a, 15
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+}
+
+define i32 @test3(i32 %c) nounwind {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[C:%.*]], 2
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       if.end:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[C]], 3
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN2:%.*]], label [[IF_END8:%.*]]
+; CHECK:       if.then2:
+; CHECK-NEXT:    br i1 true, label [[IF_THEN4:%.*]], label [[IF_END6:%.*]]
+; CHECK:       if.end6:
+; CHECK-NEXT:    ret i32 2
+; CHECK:       if.then4:
+; CHECK-NEXT:    ret i32 3
+; CHECK:       if.end8:
+; CHECK-NEXT:    ret i32 4
+;
+  %cmp = icmp slt i32 %c, 2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  ret i32 1
+
+if.end:
+  %cmp1 = icmp slt i32 %c, 3
+  br i1 %cmp1, label %if.then2, label %if.end8
+
+if.then2:
+  %cmp2 = icmp eq i32 %c, 2
+  br i1 %cmp2, label %if.then4, label %if.end6
+
+if.end6:
+  ret i32 2
+
+if.then4:
+  ret i32 3
+
+if.end8:
+  ret i32 4
+}
+
+define i32 @test4(i32 %c) nounwind {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 1, label [[SW_BB:%.*]]
+; CHECK-NEXT:    i32 2, label [[SW_BB]]
+; CHECK-NEXT:    i32 4, label [[SW_BB]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb:
+; CHECK-NEXT:    br i1 true, label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[RETURN:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 42, [[SW_DEFAULT]] ], [ 4, [[IF_THEN]] ], [ 9, [[IF_END]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  switch i32 %c, label %sw.default [
+  i32 1, label %sw.bb
+  i32 2, label %sw.bb
+  i32 4, label %sw.bb
+  ]
+
+sw.bb:
+  %cmp = icmp sge i32 %c, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  br label %return
+
+if.end:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 42, %sw.default ], [ 4, %if.then ], [ 9, %if.end ]
+  ret i32 %retval.0
+}
+
+define i1 @test5(i32 %c) nounwind {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[C:%.*]], 5
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[C]], 4
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_END]], label [[IF_END8:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret i1 true
+; CHECK:       if.end8:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[C]], 3
+; CHECK-NEXT:    [[OR:%.*]] = or i1 false, false
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+  %cmp = icmp slt i32 %c, 5
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  %cmp1 = icmp eq i32 %c, 4
+  br i1 %cmp1, label %if.end, label %if.end8
+
+if.end:
+  ret i1 true
+
+if.end8:
+  %cmp2 = icmp eq i32 %c, 3
+  %cmp3 = icmp eq i32 %c, 4
+  %cmp4 = icmp eq i32 %c, 6
+  %or = or i1 %cmp3, %cmp4
+  ret i1 %cmp2
+}
+
+define i1 @test6(i32 %c) nounwind {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ule i32 [[C:%.*]], 7
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[C]], 6
+; CHECK-NEXT:    br i1 [[COND]], label [[SW_BB:%.*]], label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret i1 true
+; CHECK:       sw.bb:
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = icmp ule i32 %c, 7
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  switch i32 %c, label %if.end [
+  i32 6, label %sw.bb
+  i32 8, label %sw.bb
+  ]
+
+if.end:
+  ret i1 true
+
+sw.bb:
+  %cmp2 = icmp eq i32 %c, 6
+  ret i1 %cmp2
+}
+
+define i1 @test7(i32 %c) nounwind {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 6, label [[SW_BB:%.*]]
+; CHECK-NEXT:    i32 7, label [[SW_BB]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb:
+; CHECK-NEXT:    ret i1 true
+; CHECK:       sw.default:
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp eq i32 [[C]], 5
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[C]], 8
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[CMP5]], false
+; CHECK-NEXT:    [[OR2:%.*]] = or i1 false, [[CMP8]]
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  switch i32 %c, label %sw.default [
+  i32 6, label %sw.bb
+  i32 7, label %sw.bb
+  ]
+
+sw.bb:
+  ret i1 true
+
+sw.default:
+  %cmp5 = icmp eq i32 %c, 5
+  %cmp6 = icmp eq i32 %c, 6
+  %cmp7 = icmp eq i32 %c, 7
+  %cmp8 = icmp eq i32 %c, 8
+  %or = or i1 %cmp5, %cmp6
+  %or2 = or i1 %cmp7, %cmp8
+  ret i1 false
+}
+
+define i1 @test8(i64* %p) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[A:%.*]] = load i64, i64* [[P:%.*]], !range !0
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i64 [[A]], 0
+; CHECK-NEXT:    ret i1 false
+;
+  %a = load i64, i64* %p, !range !{i64 4, i64 255}
+  %res = icmp eq i64 %a, 0
+  ret i1 %res
+}
+
+define i1 @test9(i64* %p) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[A:%.*]] = load i64, i64* [[P:%.*]], !range !1
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i64 [[A]], 0
+; CHECK-NEXT:    ret i1 true
+;
+  %a = load i64, i64* %p, !range !{i64 0, i64 1}
+  %res = icmp eq i64 %a, 0
+  ret i1 %res
+}
+
+define i1 @test10(i64* %p) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[A:%.*]] = load i64, i64* [[P:%.*]], !range !2
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i64 [[A]], 0
+; CHECK-NEXT:    ret i1 false
+;
+  %a = load i64, i64* %p, !range !{i64 4, i64 8, i64 15, i64 20}
+  %res = icmp eq i64 %a, 0
+  ret i1 %res
+}
+
+ at g = external global i32
+
+define i1 @test11() {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[POSITIVE:%.*]] = load i32, i32* @g, !range !3
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[POSITIVE]], 1
+; CHECK-NEXT:    [[TEST:%.*]] = icmp sgt i32 [[ADD]], 0
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret i1 true
+;
+  %positive = load i32, i32* @g, !range !{i32 1, i32 2048}
+  %add = add i32 %positive, 1
+  %test = icmp sgt i32 %add, 0
+  br label %next
+
+next:
+  ret i1 %test
+}
+
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br i1 false, label [[END:%.*]], label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       end:
+; CHECK-NEXT:    ret i32 2
+;
+  %cmp = icmp ult i32 %a, %b
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, -1
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+}
+
+define i32 @test12_swap(i32 %a, i32 %b) {
+; CHECK-LABEL: @test12_swap(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br i1 false, label [[END:%.*]], label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       end:
+; CHECK-NEXT:    ret i32 2
+;
+  %cmp = icmp ugt i32 %b, %a
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, -1
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+}
+
+; The same as @test12 but the second check is on the false path
+
+define i32 @test12_neg(i32 %a, i32 %b) {
+; CHECK-LABEL: @test12_neg(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[ALIVE:%.*]] = icmp eq i32 [[A]], -1
+; CHECK-NEXT:    br i1 [[ALIVE]], label [[END:%.*]], label [[THEN]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       end:
+; CHECK-NEXT:    ret i32 2
+;
+  %cmp = icmp ult i32 %a, %b
+  br i1 %cmp, label %then, label %else
+
+else:
+  %alive = icmp eq i32 %a, -1
+  br i1 %alive, label %end, label %then
+
+then:
+  ret i32 1
+
+end:
+  ret i32 2
+}
+
+; The same as @test12 but with signed comparison
+
+define i32 @test12_signed(i32 %a, i32 %b) {
+; CHECK-LABEL: @test12_signed(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br i1 false, label [[END:%.*]], label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       end:
+; CHECK-NEXT:    ret i32 2
+;
+  %cmp = icmp slt i32 %a, %b
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, 2147483647
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+}
+
+define i32 @test13(i32 %a, i32 %b) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A_OFF]], [[B:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br i1 false, label [[END:%.*]], label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       end:
+; CHECK-NEXT:    ret i32 2
+;
+  %a.off = add i32 %a, -8
+  %cmp = icmp ult i32 %a.off, %b
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, 7
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+}
+
+define i32 @test13_swap(i32 %a, i32 %b) {
+; CHECK-LABEL: @test13_swap(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[B:%.*]], [[A_OFF]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br i1 false, label [[END:%.*]], label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       end:
+; CHECK-NEXT:    ret i32 2
+;
+  %a.off = add i32 %a, -8
+  %cmp = icmp ugt i32 %b, %a.off
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, 7
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+}
+
+define i1 @test14_slt(i32 %a) {
+; CHECK-LABEL: @test14_slt(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A_OFF]], 8
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[RESULT:%.*]] = or i1 false, false
+; CHECK-NEXT:    ret i1 [[RESULT]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 false
+;
+  %a.off = add i32 %a, -8
+  %cmp = icmp slt i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead.1 = icmp eq i32 %a, -2147483641
+  %dead.2 = icmp eq i32 %a, 16
+  %result = or i1 %dead.1, %dead.2
+  ret i1 %result
+
+else:
+  ret i1 false
+}
+
+define i1 @test14_sle(i32 %a) {
+; CHECK-LABEL: @test14_sle(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[A_OFF]], 8
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[ALIVE:%.*]] = icmp eq i32 [[A]], 16
+; CHECK-NEXT:    [[RESULT:%.*]] = or i1 false, [[ALIVE]]
+; CHECK-NEXT:    ret i1 [[RESULT]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 false
+;
+  %a.off = add i32 %a, -8
+  %cmp = icmp sle i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, -2147483641
+  %alive = icmp eq i32 %a, 16
+  %result = or i1 %dead, %alive
+  ret i1 %result
+
+else:
+  ret i1 false
+}
+
+define i1 @test14_sgt(i32 %a) {
+; CHECK-LABEL: @test14_sgt(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A_OFF]], 8
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[RESULT:%.*]] = or i1 false, false
+; CHECK-NEXT:    ret i1 [[RESULT]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 false
+;
+  %a.off = add i32 %a, -8
+  %cmp = icmp sgt i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead.1 = icmp eq i32 %a, -2147483640
+  %dead.2 = icmp eq i32 %a, 16
+  %result = or i1 %dead.1, %dead.2
+  ret i1 %result
+
+else:
+  ret i1 false
+}
+
+define i1 @test14_sge(i32 %a) {
+; CHECK-LABEL: @test14_sge(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[A_OFF]], 8
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[ALIVE:%.*]] = icmp eq i32 [[A]], 16
+; CHECK-NEXT:    [[RESULT:%.*]] = or i1 false, [[ALIVE]]
+; CHECK-NEXT:    ret i1 [[RESULT]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 false
+;
+  %a.off = add i32 %a, -8
+  %cmp = icmp sge i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, -2147483640
+  %alive = icmp eq i32 %a, 16
+  %result = or i1 %dead, %alive
+  ret i1 %result
+
+else:
+  ret i1 false
+}
+
+define i1 @test14_ule(i32 %a) {
+; CHECK-LABEL: @test14_ule(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ule i32 [[A_OFF]], 8
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[ALIVE:%.*]] = icmp eq i32 [[A]], 16
+; CHECK-NEXT:    [[RESULT:%.*]] = or i1 false, [[ALIVE]]
+; CHECK-NEXT:    ret i1 [[RESULT]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 false
+;
+  %a.off = add i32 %a, -8
+  %cmp = icmp ule i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, 7
+  %alive = icmp eq i32 %a, 16
+  %result = or i1 %dead, %alive
+  ret i1 %result
+
+else:
+  ret i1 false
+}
+
+define i1 @test14_ugt(i32 %a) {
+; CHECK-LABEL: @test14_ugt(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[A_OFF]], 8
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[RESULT:%.*]] = or i1 false, false
+; CHECK-NEXT:    ret i1 [[RESULT]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 false
+;
+  %a.off = add i32 %a, -8
+  %cmp = icmp ugt i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead.1 = icmp eq i32 %a, 8
+  %dead.2 = icmp eq i32 %a, 16
+  %result = or i1 %dead.1, %dead.2
+  ret i1 %result
+
+else:
+  ret i1 false
+}
+
+define i1 @test14_uge(i32 %a) {
+; CHECK-LABEL: @test14_uge(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 [[A:%.*]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i32 [[A_OFF]], 8
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[ALIVE:%.*]] = icmp eq i32 [[A]], 16
+; CHECK-NEXT:    [[RESULT:%.*]] = or i1 false, [[ALIVE]]
+; CHECK-NEXT:    ret i1 [[RESULT]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 false
+;
+  %a.off = add i32 %a, -8
+  %cmp = icmp uge i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, 8
+  %alive = icmp eq i32 %a, 16
+  %result = or i1 %dead, %alive
+  ret i1 %result
+
+else:
+  ret i1 false
+}
+
+ at limit = external global i32
+define i1 @test15(i32 %a) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[LIMIT:%.*]] = load i32, i32* @limit, !range !4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A:%.*]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       else:
+; CHECK-NEXT:    ret i1 false
+;
+  %limit = load i32, i32* @limit, !range !{i32 0, i32 256}
+  %cmp = icmp ult i32 %a, %limit
+  br i1 %cmp, label %then, label %else
+
+then:
+  %result = icmp eq i32 %a, 255
+  ret i1 %result
+
+else:
+  ret i1 false
+}
+
+define i32 @test16(i8 %a) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    br label [[DISPATCH:%.*]]
+; CHECK:       dispatch:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[A]], 93
+; CHECK-NEXT:    br i1 [[CMP]], label [[TARGET93:%.*]], label [[DISPATCH]]
+; CHECK:       target93:
+; CHECK-NEXT:    ret i32 93
+;
+entry:
+  %b = zext i8 %a to i32
+  br label %dispatch
+
+dispatch:
+  %cmp = icmp eq i8 %a, 93
+  br i1 %cmp, label %target93, label %dispatch
+
+target93:
+  ret i32 %b
+}
+
+define i32 @test16_i1(i1 %a) {
+; CHECK-LABEL: @test16_i1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = zext i1 [[A:%.*]] to i32
+; CHECK-NEXT:    br label [[DISPATCH:%.*]]
+; CHECK:       dispatch:
+; CHECK-NEXT:    br i1 [[A]], label [[TRUE:%.*]], label [[DISPATCH]]
+; CHECK:       true:
+; CHECK-NEXT:    ret i32 1
+;
+entry:
+  %b = zext i1 %a to i32
+  br label %dispatch
+
+dispatch:
+  br i1 %a, label %true, label %dispatch
+
+true:
+  ret i32 %b
+}
+
+define i8 @test17(i8 %a) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = add i8 [[A:%.*]], 3
+; CHECK-NEXT:    br label [[DISPATCH:%.*]]
+; CHECK:       dispatch:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[A]], 93
+; CHECK-NEXT:    br i1 [[CMP]], label [[TARGET93:%.*]], label [[DISPATCH]]
+; CHECK:       target93:
+; CHECK-NEXT:    ret i8 96
+;
+entry:
+  %c = add i8 %a, 3
+  br label %dispatch
+
+dispatch:
+  %cmp = icmp eq i8 %a, 93
+  br i1 %cmp, label %target93, label %dispatch
+
+target93:
+  ret i8 %c
+}
+
+define i8 @test17_2(i8 %a) {
+; CHECK-LABEL: @test17_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = add i8 [[A:%.*]], [[A]]
+; CHECK-NEXT:    br label [[DISPATCH:%.*]]
+; CHECK:       dispatch:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[A]], 93
+; CHECK-NEXT:    br i1 [[CMP]], label [[TARGET93:%.*]], label [[DISPATCH]]
+; CHECK:       target93:
+; CHECK-NEXT:    ret i8 -70
+;
+entry:
+  %c = add i8 %a, %a
+  br label %dispatch
+
+dispatch:
+  %cmp = icmp eq i8 %a, 93
+  br i1 %cmp, label %target93, label %dispatch
+
+target93:
+  ret i8 %c
+}
+
+define i1 @test17_i1(i1 %a) {
+; CHECK-LABEL: @test17_i1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = and i1 [[A:%.*]], true
+; CHECK-NEXT:    br label [[DISPATCH:%.*]]
+; CHECK:       dispatch:
+; CHECK-NEXT:    br i1 [[A]], label [[TRUE:%.*]], label [[DISPATCH]]
+; CHECK:       true:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %c = and i1 %a, true
+  br label %dispatch
+
+dispatch:
+  br i1 %a, label %true, label %dispatch
+
+true:
+  ret i1 %c
+}
+
+define i32 @test18(i8 %a) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    br label [[DISPATCH:%.*]]
+; CHECK:       dispatch:
+; CHECK-NEXT:    switch i8 [[A]], label [[DISPATCH]] [
+; CHECK-NEXT:    i8 93, label [[TARGET93:%.*]]
+; CHECK-NEXT:    i8 -111, label [[DISPATCH]]
+; CHECK-NEXT:    ]
+; CHECK:       target93:
+; CHECK-NEXT:    ret i32 93
+;
+entry:
+  %b = zext i8 %a to i32
+  br label %dispatch
+
+dispatch:
+  switch i8 %a, label %dispatch [
+  i8 93, label %target93
+  i8 -111, label %dispatch
+  ]
+
+target93:
+  ret i32 %b
+}
+
+define i8 @test19(i8 %a) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = add i8 [[A:%.*]], 3
+; CHECK-NEXT:    br label [[DISPATCH:%.*]]
+; CHECK:       dispatch:
+; CHECK-NEXT:    switch i8 [[A]], label [[DISPATCH]] [
+; CHECK-NEXT:    i8 93, label [[TARGET93:%.*]]
+; CHECK-NEXT:    i8 -111, label [[DISPATCH]]
+; CHECK-NEXT:    ]
+; CHECK:       target93:
+; CHECK-NEXT:    ret i8 96
+;
+entry:
+  %c = add i8 %a, 3
+  br label %dispatch
+
+dispatch:
+  switch i8 %a, label %dispatch [
+  i8 93, label %target93
+  i8 -111, label %dispatch
+  ]
+
+target93:
+  ret i8 %c
+}
+
+; Negative test. Shouldn't be incorrectly optimized to "ret i1 false".
+
+define i1 @test20(i64 %a) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = and i64 [[A:%.*]], 7
+; CHECK-NEXT:    br label [[DISPATCH:%.*]]
+; CHECK:       dispatch:
+; CHECK-NEXT:    switch i64 [[A]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i64 0, label [[EXIT2:%.*]]
+; CHECK-NEXT:    i64 -2147483647, label [[EXIT2]]
+; CHECK-NEXT:    ]
+; CHECK:       default:
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i64 [[B]], 0
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i1 [[C]]
+; CHECK:       exit2:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %b = and i64 %a, 7
+  br label %dispatch
+
+dispatch:
+  switch i64 %a, label %default [
+  i64 0, label %exit2
+  i64 -2147483647, label %exit2
+  ]
+
+default:
+  %c = icmp eq i64 %b, 0
+  br label %exit
+
+exit:
+  ret i1 %c
+
+exit2:
+  ret i1 false
+}
+
+define i1 @slt(i8 %a, i8 %b) {
+; CHECK-LABEL: @slt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %cmp = icmp slt i8 %a, %b
+  call void @llvm.assume(i1 %cmp)
+  %res = icmp slt i8 %a, 127
+  ret i1 %res
+}
+
+define i1 @sgt(i8 %a, i8 %b) {
+; CHECK-LABEL: @sgt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %cmp = icmp sgt i8 %a, %b
+  call void @llvm.assume(i1 %cmp)
+  %res = icmp sgt i8 %a, -128
+  ret i1 %res
+}
+
+define i1 @ult(i8 %a, i8 %b) {
+; CHECK-LABEL: @ult(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %cmp = icmp ult i8 %a, %b
+  call void @llvm.assume(i1 %cmp)
+  %res = icmp ult i8 %a, 255
+  ret i1 %res
+}
+
+define i1 @ugt(i8 %a, i8 %b) {
+; CHECK-LABEL: @ugt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %cmp = icmp ugt i8 %a, %b
+  call void @llvm.assume(i1 %cmp)
+  %res = icmp ugt i8 %a, 0
+  ret i1 %res
+}
+
+declare void @llvm.assume(i1)

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/sdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/sdiv.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/sdiv.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/sdiv.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,97 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+; CHECK-LABEL: @test0(
+define void @test0(i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %j.0 = phi i32 [ %n, %entry ], [ %div, %for.body ]
+  %cmp = icmp sgt i32 %j.0, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+; CHECK: %div1 = udiv i32 %j.0, 2
+  %div = sdiv i32 %j.0, 2
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; CHECK-LABEL: @test1(
+define void @test1(i32 %n) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %j.0 = phi i32 [ %n, %entry ], [ %div, %for.body ]
+  %cmp = icmp sgt i32 %j.0, -2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+; CHECK: %div = sdiv i32 %j.0, 2
+  %div = sdiv i32 %j.0, 2
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; CHECK-LABEL: @test2(
+define void @test2(i32 %n) {
+entry:
+  %cmp = icmp sgt i32 %n, 1
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %div1 = udiv i32 %n, 2 
+  %div = sdiv i32 %n, 2
+  br label %exit
+
+exit:
+  ret void
+}
+
+; looping case where loop has exactly one block
+; at the point of sdiv, we know that %a is always greater than 0,
+; because of the guard before it, so we can transform it to udiv.
+declare void @llvm.experimental.guard(i1,...)
+; CHECK-LABEL: @test4
+define void @test4(i32 %n) {
+entry:
+  %cmp = icmp sgt i32 %n, 0
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+; CHECK: udiv i32 %a, 6
+  %a = phi i32 [ %n, %entry ], [ %div, %loop ]
+  %cond = icmp sgt i32 %a, 4
+  call void(i1,...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  %div = sdiv i32 %a, 6
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; same test as above with assume instead of guard.
+declare void @llvm.assume(i1)
+; CHECK-LABEL: @test5
+define void @test5(i32 %n) {
+entry:
+  %cmp = icmp sgt i32 %n, 0
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+; CHECK: udiv i32 %a, 6
+  %a = phi i32 [ %n, %entry ], [ %div, %loop ]
+  %cond = icmp sgt i32 %a, 4
+  call void @llvm.assume(i1 %cond)
+  %div = sdiv i32 %a, 6
+  %loopcond = icmp sgt i32 %div, 8
+  br i1 %loopcond, label %loop, label %exit
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/select.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/select.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/select.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,218 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+; CHECK-LABEL: @simple(
+define i8 @simple(i1) {
+entry:
+  %s = select i1 %0, i8 0, i8 1
+  br i1 %0, label %then, label %else
+
+then:
+; CHECK: ret i8 0
+  %a = phi i8 [ %s, %entry ]
+  ret i8 %a
+
+else:
+; CHECK: ret i8 1
+  %b = phi i8 [ %s, %entry ]
+  ret i8 %b
+}
+
+; CHECK-LABEL: @loop(
+define void @loop(i32) {
+entry:
+  br label %loop
+
+loop:
+  %idx = phi i32 [ %0, %entry ], [ %sel, %loop ]
+; CHECK: %idx = phi i32 [ %0, %entry ], [ %2, %loop ]
+  %1 = icmp eq i32 %idx, 0
+  %2 = add i32 %idx, -1
+  %sel = select i1 %1, i32 0, i32 %2
+  br i1 %1, label %out, label %loop
+
+out:
+  ret void
+}
+
+; CHECK-LABEL: @not_correlated(
+define i8 @not_correlated(i1, i1) {
+entry:
+  %s = select i1 %0, i8 0, i8 1
+  br i1 %1, label %then, label %else
+
+then:
+; CHECK: ret i8 %s
+  %a = phi i8 [ %s, %entry ]
+  ret i8 %a
+
+else:
+; CHECK: ret i8 %s
+  %b = phi i8 [ %s, %entry ]
+  ret i8 %b
+}
+
+ at c = global i32 0, align 4
+ at b = global i32 0, align 4
+
+; CHECK-LABEL: @PR23752(
+define i32 @PR23752() {
+entry:
+  br label %for.body
+
+for.body:
+  %phi = phi i32 [ 0, %entry ], [ %sel, %for.body ]
+  %sel = select i1 icmp sgt (i32* @b, i32* @c), i32 %phi, i32 1
+  %cmp = icmp ne i32 %sel, 1
+  br i1 %cmp, label %for.body, label %if.end
+
+; CHECK:      %[[sel:.*]] = select i1 icmp sgt (i32* @b, i32* @c), i32 0, i32 1
+; CHECK-NEXT: %[[cmp:.*]] = icmp ne i32 %[[sel]], 1
+; CHECK-NEXT: br i1 %[[cmp]]
+
+if.end:
+  ret i32 %sel
+; CHECK: ret i32 1
+}
+
+define i1 @test1(i32* %p, i1 %unknown) {
+; CHECK-LABEL: @test1
+  %pval = load i32, i32* %p
+  %cmp1 = icmp slt i32 %pval, 255
+  br i1 %cmp1, label %next, label %exit
+
+next:
+  %min = select i1 %unknown, i32 %pval, i32 5
+  ;; TODO: This pointless branch shouldn't be neccessary
+  br label %next2
+next2:
+; CHECK-LABEL: next2:
+; CHECK: ret i1 false
+  %res = icmp eq i32 %min, 255
+  ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+  ret i1 true
+}
+
+; Check that we take a conservative meet
+define i1 @test2(i32* %p, i32 %qval, i1 %unknown) {
+; CHECK-LABEL: test2
+  %pval = load i32, i32* %p
+  %cmp1 = icmp slt i32 %pval, 255
+  br i1 %cmp1, label %next, label %exit
+
+next:
+  %min = select i1 %unknown, i32 %pval, i32 %qval
+  ;; TODO: This pointless branch shouldn't be neccessary
+  br label %next2
+next2:
+; CHECK-LABEL: next2
+; CHECK: ret i1 %res
+  %res = icmp eq i32 %min, 255
+  ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+  ret i1 true
+}
+
+; Same as @test2, but for the opposite select input
+define i1 @test3(i32* %p, i32 %qval, i1 %unknown) {
+; CHECK-LABEL: test3
+  %pval = load i32, i32* %p
+  %cmp1 = icmp slt i32 %pval, 255
+  br i1 %cmp1, label %next, label %exit
+
+next:
+  %min = select i1 %unknown, i32 %qval, i32 %pval
+  ;; TODO: This pointless branch shouldn't be neccessary
+  br label %next2
+next2:
+; CHECK-LABEL: next2
+; CHECK: ret i1 %res
+  %res = icmp eq i32 %min, 255
+  ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+  ret i1 true
+}
+
+; Conflicting constants (i.e. isOverdefined result)
+; NOTE: Using doubles in this version is a bit of a hack.  This
+; is to get around the fact that all integers (including constants
+; and non-constants) are actually represented as constant-ranges.
+define i1 @test4(i32* %p, i32 %qval, i1 %unknown) {
+; CHECK-LABEL: test4
+  %pval = load i32, i32* %p
+  %cmp1 = icmp slt i32 %pval, 255
+  br i1 %cmp1, label %next, label %exit
+
+next:
+  %min = select i1 %unknown, double 1.0, double 0.0
+  ;; TODO: This pointless branch shouldn't be neccessary
+  br label %next2
+next2:
+; CHECK-LABEL: next2
+; CHECK: ret i1 %res
+  %res = fcmp oeq double %min, 300.0
+  ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+  ret i1 true
+}
+
+;; Using the condition to clamp the result
+;; 
+
+define i1 @test5(i32* %p, i1 %unknown) {
+; CHECK-LABEL: @test5
+  %pval = load i32, i32* %p
+  %cmp1 = icmp slt i32 %pval, 255
+  br i1 %cmp1, label %next, label %exit
+
+next:
+  %cond = icmp sgt i32 %pval, 0
+  %min = select i1 %cond, i32 %pval, i32 5
+  ;; TODO: This pointless branch shouldn't be neccessary
+  br label %next2
+next2:
+; CHECK-LABEL: next2:
+; CHECK: ret i1 false
+  %res = icmp eq i32 %min, -1
+  ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+  ret i1 true
+}
+
+define i1 @test6(i32* %p, i1 %unknown) {
+; CHECK-LABEL: @test6
+  %pval = load i32, i32* %p
+  %cmp1 = icmp ult i32 %pval, 255
+  br i1 %cmp1, label %next, label %exit
+
+next:
+  %cond = icmp ne i32 %pval, 254
+  %sel = select i1 %cond, i32 %pval, i32 1
+  ;; TODO: This pointless branch shouldn't be neccessary
+  br label %next2
+next2:
+; CHECK-LABEL: next2:
+; CHECK: ret i1 true
+  %res = icmp slt i32 %sel, 254
+  ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+  ret i1 true
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/srem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/srem.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/srem.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/srem.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7m-arm-none-eabi"
+
+define void @h(i32* nocapture %p, i32 %x) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: @h(
+; CHECK: urem
+
+  %cmp = icmp sgt i32 %x, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  %rem2 = srem i32 %x, 10
+  store i32 %rem2, i32* %p, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; looping case where loop has exactly one block
+; at the point of srem, we know that %a is always greater than 0,
+; because of the assume before it, so we can transform it to urem.
+declare void @llvm.assume(i1)
+; CHECK-LABEL: @test4
+define void @test4(i32 %n) {
+entry:
+  %cmp = icmp sgt i32 %n, 0
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+; CHECK: urem i32 %a, 6
+  %a = phi i32 [ %n, %entry ], [ %rem, %loop ]
+  %cond = icmp sgt i32 %a, 4
+  call void @llvm.assume(i1 %cond)
+  %rem = srem i32 %a, 6
+  %loopcond = icmp sgt i32 %rem, 8
+  br i1 %loopcond, label %loop, label %exit
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/udiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/udiv.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/udiv.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/udiv.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,101 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+; Check that debug locations are preserved. For more info see:
+;   https://llvm.org/docs/SourceLevelDebugging.html#fixing-errors
+; RUN: opt < %s -enable-debugify -correlated-propagation -S 2>&1 | \
+; RUN:   FileCheck %s -check-prefix=DEBUG
+; DEBUG: CheckModuleDebugify: PASS
+
+; CHECK-LABEL: @test_nop
+define void @test_nop(i32 %n) {
+; CHECK udiv i32 %n, 100
+  %div = udiv i32 %n, 100
+  ret void
+}
+
+; CHECK-LABEL: @test1(
+define void @test1(i32 %n) {
+entry:
+  %cmp = icmp ule i32 %n, 65535
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: udiv i16
+  %div = udiv i32 %n, 100
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test2(
+define void @test2(i32 %n) {
+entry:
+  %cmp = icmp ule i32 %n, 65536
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: udiv i32 %n, 100
+  %div = udiv i32 %n, 100
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test3(
+define void @test3(i32 %m, i32 %n) {
+entry:
+  %cmp1 = icmp ult i32 %m, 65535
+  %cmp2 = icmp ult i32 %n, 65535
+  %cmp = and i1 %cmp1, %cmp2
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: udiv i16
+  %div = udiv i32 %m, %n
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test4(
+define void @test4(i32 %m, i32 %n) {
+entry:
+  %cmp1 = icmp ult i32 %m, 65535
+  %cmp2 = icmp ule i32 %n, 65536
+  %cmp = and i1 %cmp1, %cmp2
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: udiv i32 %m, %n
+  %div = udiv i32 %m, %n
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test5
+define void @test5(i32 %n) {
+  %trunc = and i32 %n, 65535
+  ; CHECK: udiv i16
+  %div = udiv i32 %trunc, 42
+  ret void
+}
+
+; CHECK-LABEL: @test6
+define void @test6(i32 %n) {
+entry:
+  %cmp = icmp ule i32 %n, 255
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: udiv i8
+  %div = sdiv i32 %n, 100
+  br label %exit
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/CorrelatedValuePropagation/urem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CorrelatedValuePropagation/urem.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CorrelatedValuePropagation/urem.ll (added)
+++ llvm/trunk/test/Transforms/CorrelatedValuePropagation/urem.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,101 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+; CHECK-LABEL: @test_nop
+define void @test_nop(i32 %n) {
+; CHECK udiv i32 %n, 100
+  %div = udiv i32 %n, 100
+  ret void
+}
+
+; CHECK-LABEL: @test1(
+define void @test1(i32 %n) {
+entry:
+  %cmp = icmp ule i32 %n, 65535
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: urem i16
+  %div = urem i32 %n, 100
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test2(
+define void @test2(i32 %n) {
+entry:
+  %cmp = icmp ule i32 %n, 65536
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: urem i32 %n, 100
+  %div = urem i32 %n, 100
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test3(
+define void @test3(i32 %m, i32 %n) {
+entry:
+  %cmp1 = icmp ult i32 %m, 65535
+  %cmp2 = icmp ult i32 %n, 65535
+  %cmp = and i1 %cmp1, %cmp2
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: urem i16
+  %div = urem i32 %m, %n
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test4(
+define void @test4(i32 %m, i32 %n) {
+entry:
+  %cmp1 = icmp ult i32 %m, 65535
+  %cmp2 = icmp ule i32 %n, 65536
+  %cmp = and i1 %cmp1, %cmp2
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: urem i32 %m, %n
+  %div = urem i32 %m, %n
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test5
+define void @test5(i32 %n) {
+  %trunc = and i32 %n, 63
+  ; CHECK: urem i8
+  %div = urem i32 %trunc, 42
+  ret void
+}
+
+; CHECK-LABEL: @test6
+define void @test6(i32 %n) {
+entry:
+  %cmp = icmp ule i32 %n, 255
+  br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: urem i8
+  %div = srem i32 %n, 100
+  br label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @non_power_of_2
+define void @non_power_of_2(i24 %n) {
+  %div = urem i24 %n, 42
+  ret void
+}

Added: llvm/trunk/test/Transforms/CrossDSOCFI/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CrossDSOCFI/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CrossDSOCFI/basic.ll (added)
+++ llvm/trunk/test/Transforms/CrossDSOCFI/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,81 @@
+; RUN: opt -S -cross-dso-cfi < %s | FileCheck %s
+; RUN: opt -S -passes=cross-dso-cfi < %s | FileCheck %s
+
+; CHECK:     define void @__cfi_check(i64 %[[TYPE:.*]], i8* %[[ADDR:.*]], i8* %[[DATA:.*]]) align 4096
+; CHECK:     switch i64 %[[TYPE]], label %[[FAIL:.*]] [
+; CHECK-NEXT:   i64 111, label %[[L1:.*]]
+; CHECK-NEXT:   i64 222, label %[[L2:.*]]
+; CHECK-NEXT:   i64 333, label %[[L3:.*]]
+; CHECK-NEXT:   i64 444, label %[[L4:.*]]
+; CHECK-NEXT: {{]$}}
+
+; CHECK:     [[EXIT:.*]]:
+; CHECK-NEXT:   ret void
+
+; CHECK:     [[FAIL]]:
+; CHECK-NEXT:   call void @__cfi_check_fail(i8* %[[DATA]], i8* %[[ADDR]])
+; CHECK-NEXT:   br label %[[EXIT]]
+
+; CHECK:     [[L1]]:
+; CHECK-NEXT:   call i1 @llvm.type.test(i8* %[[ADDR]], metadata i64 111)
+; CHECK-NEXT:   br {{.*}} label %[[EXIT]], label %[[FAIL]]
+
+; CHECK:     [[L2]]:
+; CHECK-NEXT:   call i1 @llvm.type.test(i8* %[[ADDR]], metadata i64 222)
+; CHECK-NEXT:   br {{.*}} label %[[EXIT]], label %[[FAIL]]
+
+; CHECK:     [[L3]]:
+; CHECK-NEXT:   call i1 @llvm.type.test(i8* %[[ADDR]], metadata i64 333)
+; CHECK-NEXT:   br {{.*}} label %[[EXIT]], label %[[FAIL]]
+
+; CHECK:     [[L4]]:
+; CHECK-NEXT:   call i1 @llvm.type.test(i8* %[[ADDR]], metadata i64 444)
+; CHECK-NEXT:   br {{.*}} label %[[EXIT]], label %[[FAIL]]
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTV1A = constant i8 0, !type !4, !type !5
+ at _ZTV1B = constant i8 0, !type !4, !type !5, !type !6, !type !7
+
+define signext i8 @f11() !type !0 !type !1 {
+entry:
+  ret i8 1
+}
+
+define signext i8 @f12() !type !0 !type !1 {
+entry:
+  ret i8 2
+}
+
+define signext i8 @f13() !type !0 !type !1 {
+entry:
+  ret i8 3
+}
+
+define i32 @f21() !type !2 !type !3 {
+entry:
+  ret i32 4
+}
+
+define i32 @f22() !type !2 !type !3 {
+entry:
+  ret i32 5
+}
+
+define weak_odr hidden void @__cfi_check_fail(i8*, i8*) {
+entry:
+  ret void
+}
+
+!llvm.module.flags = !{!8}
+
+!0 = !{i64 0, !"_ZTSFcvE"}
+!1 = !{i64 0, i64 111}
+!2 = !{i64 0, !"_ZTSFivE"}
+!3 = !{i64 0, i64 222}
+!4 = !{i64 16, !"_ZTS1A"}
+!5 = !{i64 16, i64 333}
+!6 = !{i64 16, !"_ZTS1B"}
+!7 = !{i64 16, i64 444}
+!8 = !{i32 4, !"Cross-DSO CFI", i32 1}

Added: llvm/trunk/test/Transforms/CrossDSOCFI/cfi_functions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CrossDSOCFI/cfi_functions.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CrossDSOCFI/cfi_functions.ll (added)
+++ llvm/trunk/test/Transforms/CrossDSOCFI/cfi_functions.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; Test that types referenced in ThinLTO-style !cfi.functions are known to __cfi_check.
+; RUN: opt -S -cross-dso-cfi < %s | FileCheck %s
+; RUN: opt -S -passes=cross-dso-cfi < %s | FileCheck %s
+
+; CHECK:      define void @__cfi_check(
+; CHECK:        switch i64
+; CHECK-NEXT:     i64 1234, label
+; CHECK-NEXT:     i64 5678, label
+; CHECK-NEXT:   ]
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+!cfi.functions = !{!0, !1}
+!llvm.module.flags = !{!6}
+
+!0 = !{!"f", i8 0, !2, !4}
+!1 = !{!"g", i8 1, !3, !5}
+!2 = !{i64 0, !"typeid1"}
+!3 = !{i64 0, !"typeid2"}
+!4 = !{i64 0, i64 1234}
+!5 = !{i64 0, i64 5678}
+!6 = !{i32 4, !"Cross-DSO CFI", i32 1}

Added: llvm/trunk/test/Transforms/CrossDSOCFI/thumb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CrossDSOCFI/thumb.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CrossDSOCFI/thumb.ll (added)
+++ llvm/trunk/test/Transforms/CrossDSOCFI/thumb.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt -mtriple=armv7-linux-android -S -cross-dso-cfi < %s | FileCheck --check-prefix=THUMB %s
+; RUN: opt -mtriple=thumbv7-linux-android -S -cross-dso-cfi < %s | FileCheck --check-prefix=THUMB %s
+; RUN: opt -mtriple=i386-linux -S -cross-dso-cfi < %s | FileCheck --check-prefix=NOTHUMB %s
+; RUN: opt -mtriple=x86_64-linux -S -cross-dso-cfi < %s | FileCheck --check-prefix=NOTHUMB %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define signext i8 @f() !type !0 !type !1 {
+entry:
+  ret i8 1
+}
+
+!llvm.module.flags = !{!2}
+
+!0 = !{i64 0, !"_ZTSFcvE"}
+!1 = !{i64 0, i64 111}
+!2 = !{i32 4, !"Cross-DSO CFI", i32 1}
+
+; THUMB: define void @__cfi_check({{.*}} #[[A:.*]] align 4096
+; THUMB: attributes #[[A]] = { {{.*}}"target-features"="+thumb-mode"
+
+; NOTHUMB: define void @__cfi_check({{.*}} align 4096

Added: llvm/trunk/test/Transforms/DCE/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DCE/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DCE/basic.ll (added)
+++ llvm/trunk/test/Transforms/DCE/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt -debugify -dce -S < %s | FileCheck %s
+; RUN: opt -passes='module(debugify),function(dce)' -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test
+define void @test() {
+  %add = add i32 1, 2
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 1, metadata [[add:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 2, DW_OP_stack_value))
+  %sub = sub i32 %add, 1
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 1, metadata [[sub:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 2, DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value))
+; CHECK-NEXT: ret void
+  ret void
+}
+
+; CHECK: [[add]] = !DILocalVariable
+; CHECK: [[sub]] = !DILocalVariable

Added: llvm/trunk/test/Transforms/DCE/calls-errno.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DCE/calls-errno.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DCE/calls-errno.ll (added)
+++ llvm/trunk/test/Transforms/DCE/calls-errno.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,99 @@
+; RUN: opt < %s -dce -S | FileCheck %s
+
+declare double @acos(double) nounwind
+declare double @asin(double) nounwind
+declare double @atan(double) nounwind
+declare double @atan2(double, double) nounwind
+declare double @ceil(double) nounwind
+declare double @cos(double) nounwind
+declare double @cosh(double) nounwind
+declare double @exp(double) nounwind
+declare double @exp2(double) nounwind
+declare double @fabs(double) nounwind
+declare double @floor(double) nounwind
+declare double @fmod(double, double) nounwind
+declare double @log(double) nounwind
+declare double @log10(double) nounwind
+declare double @pow(double, double) nounwind
+declare double @sin(double) nounwind
+declare double @sinh(double) nounwind
+declare double @sqrt(double) nounwind
+declare double @tan(double) nounwind
+declare double @tanh(double) nounwind
+
+declare float @acosf(float) nounwind
+declare float @asinf(float) nounwind
+declare float @atanf(float) nounwind
+declare float @atan2f(float, float) nounwind
+declare float @ceilf(float) nounwind
+declare float @cosf(float) nounwind
+declare float @coshf(float) nounwind
+declare float @expf(float) nounwind
+declare float @exp2f(float) nounwind
+declare float @fabsf(float) nounwind
+declare float @floorf(float) nounwind
+declare float @fmodf(float, float) nounwind
+declare float @logf(float) nounwind
+declare float @log10f(float) nounwind
+declare float @powf(float, float) nounwind
+declare float @sinf(float) nounwind
+declare float @sinhf(float) nounwind
+declare float @sqrtf(float) nounwind
+declare float @tanf(float) nounwind
+declare float @tanhf(float) nounwind
+
+define void @T() {
+entry:
+; CHECK-LABEL: @T(
+; CHECK-NEXT: entry:
+
+; log(0) produces a pole error
+; CHECK-NEXT: %log1 = call double @log(double 0.000000e+00)
+  %log1 = call double @log(double 0.000000e+00)
+
+; log(-1) produces a domain error
+; CHECK-NEXT: %log2 = call double @log(double -1.000000e+00)
+  %log2 = call double @log(double -1.000000e+00)
+
+; log(1) is 0
+  %log3 = call double @log(double 1.000000e+00)
+
+; exp(100) is roughly 2.6e+43
+  %exp1 = call double @exp(double 1.000000e+02)
+
+; exp(1000) is a range error
+; CHECK-NEXT: %exp2 = call double @exp(double 1.000000e+03)
+  %exp2 = call double @exp(double 1.000000e+03)
+
+; cos(0) is 1
+  %cos1 = call double @cos(double 0.000000e+00)
+
+; cos(inf) is a domain error
+; CHECK-NEXT: %cos2 = call double @cos(double 0x7FF0000000000000)
+  %cos2 = call double @cos(double 0x7FF0000000000000)
+
+; cos(0) nobuiltin may have side effects 
+; CHECK-NEXT: %cos3 = call double @cos(double 0.000000e+00)
+  %cos3 = call double @cos(double 0.000000e+00) nobuiltin
+
+; cos(1) strictfp sets FP status flags
+; CHECK-NEXT: %cos4 = call double @cos(double 1.000000e+00)
+  %cos4 = call double @cos(double 1.000000e+00) strictfp
+
+; pow(0, 1) is 0
+  %pow1 = call double @pow(double 0x7FF0000000000000, double 1.000000e+00)
+
+; pow(0, -1) is a pole error
+; FIXME: It fails on mingw host. Suppress checking.
+; %pow2 = call double @pow(double 0.000000e+00, double -1.000000e+00)
+
+; fmod(inf, nan) is nan
+  %fmod1 = call double @fmod(double 0x7FF0000000000000, double 0x7FF0000000000001)
+
+; fmod(inf, 1) is a domain error
+; CHECK-NEXT: %fmod2 = call double @fmod(double 0x7FF0000000000000, double 1.000000e+00)
+  %fmod2 = call double @fmod(double 0x7FF0000000000000, double 1.000000e+00)
+
+; CHECK-NEXT: ret void
+  ret void
+}

Added: llvm/trunk/test/Transforms/DCE/guards.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DCE/guards.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DCE/guards.ll (added)
+++ llvm/trunk/test/Transforms/DCE/guards.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,11 @@
+; RUN: opt -dce -S < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1,...)
+
+define void @f(i32 %val) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT: ret void
+  %val2 = add i32 %val, 1
+  call void(i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"(i32 %val2) ]
+  ret void
+}

Added: llvm/trunk/test/Transforms/DCE/int_sideeffect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DCE/int_sideeffect.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DCE/int_sideeffect.ll (added)
+++ llvm/trunk/test/Transforms/DCE/int_sideeffect.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,12 @@
+; RUN: opt -S < %s -instcombine | FileCheck %s
+
+declare void @llvm.sideeffect()
+
+; Don't DCE llvm.sideeffect calls.
+
+; CHECK-LABEL: dce
+; CHECK: call void @llvm.sideeffect()
+define void @dce() {
+    call void @llvm.sideeffect()
+    ret void
+}

Added: llvm/trunk/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,11 @@
+; RUN: opt < %s -deadargelim -disable-output
+
+define internal void @build_delaunay({ i32 }* sret  %agg.result) {
+        ret void
+}
+
+define void @test() {
+        call void @build_delaunay( { i32 }* sret  null )
+        ret void
+}
+

Added: llvm/trunk/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,11 @@
+; RUN: opt < %s -deadargelim -S | grep "@test("
+; RUN: opt < %s -deadargelim -S | not grep dead
+
+define internal i32 @test(i32 %X, i32 %dead) {
+	ret i32 %X
+}
+
+define i32 @caller() {
+	%A = call i32 @test(i32 123, i32 456)
+	ret i32 %A
+}

Added: llvm/trunk/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,12 @@
+; RUN: opt < %s -deadargelim -S | not grep "ret i32 0"
+; PR1735
+
+define internal i32 @test(i32 %A, ...) { 
+	ret i32 %A
+}
+
+define i32 @foo() {
+	%A = call i32(i32, ...) @test(i32 1)
+	ret i32 %A
+}
+

Added: llvm/trunk/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+%struct = type { }
+
+ at g = global i8 0
+
+; CHECK: define internal void @foo(i8 signext %y) [[NUW:#[0-9]+]]
+
+define internal zeroext i8 @foo(i8* inreg %p, i8 signext %y, ... )  nounwind {
+  store i8 %y, i8* @g
+  ret i8 0
+}
+
+define i32 @bar() {
+; CHECK: call void @foo(i8 signext 1) [[NUW]]
+  %A = call zeroext i8(i8*, i8, ...) @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
+  ret i32 0
+}
+
+; CHECK: attributes [[NUW]] = { nounwind }

Added: llvm/trunk/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; RUN: opt < %s -deadargelim -S | grep byval
+
+	%struct.point = type { double, double }
+ at pts = global [4 x %struct.point] [ %struct.point { double 1.000000e+00, double 2.000000e+00 }, %struct.point { double 3.000000e+00, double 4.000000e+00 }, %struct.point { double 5.000000e+00, double 6.000000e+00 }, %struct.point { double 7.000000e+00, double 8.000000e+00 } ], align 32		; <[4 x %struct.point]*> [#uses=1]
+
+define internal i32 @va1(i32 %nargs, ...) {
+entry:
+	%pi = alloca %struct.point		; <%struct.point*> [#uses=0]
+	%args = alloca i8*		; <i8**> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%args1 = bitcast i8** %args to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_start( i8* %args1 )
+	%args41 = bitcast i8** %args to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_end( i8* %args41 )
+	ret i32 undef
+}
+
+declare void @llvm.va_start(i8*) nounwind 
+
+declare void @llvm.va_end(i8*) nounwind 
+
+define i32 @main() {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = getelementptr [4 x %struct.point], [4 x %struct.point]* @pts, i32 0, i32 0		; <%struct.point*> [#uses=1]
+	%tmp1 = call i32 (i32, ...) @va1( i32 1, %struct.point* byval  %tmp ) nounwind 		; <i32> [#uses=0]
+	call void @exit( i32 0 ) noreturn nounwind 
+	unreachable
+}
+
+declare void @exit(i32) noreturn nounwind 

Added: llvm/trunk/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt < %s -deadargelim -die -S > %t
+; RUN: cat %t | grep 123
+
+; This test tries to catch wrongful removal of return values for a specific case
+; that was breaking llvm-gcc builds.
+
+; This function has a live return value, it is used by @alive.
+define internal i32 @test5() {
+  ret i32 123 
+}
+
+; This function doesn't use the return value @test5 and tries to lure DAE into
+; marking @test5's return value dead because only this call is unused.
+define i32 @dead() {
+  %DEAD = call i32 @test5()
+  ret i32 0
+}
+
+; This function ensures the retval of @test5 is live.
+define i32 @alive() {
+  %LIVE = call i32 @test5()
+  ret i32 %LIVE
+}

Added: llvm/trunk/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; RUN: opt < %s -deadargelim | llvm-dis
+; PR3807
+
+define internal { i32, i32 } @foo() {
+  ret {i32,i32} {i32 42, i32 4}
+}
+
+define i32 @bar() personality i32 (...)* @__gxx_personality_v0 {
+  %x = invoke {i32,i32} @foo() to label %T unwind label %T2
+T:
+  %y = extractvalue {i32,i32} %x, 1
+  ret i32 %y
+T2:
+  %exn = landingpad {i8*, i32}
+            cleanup
+  unreachable
+}
+
+define i32 @bar2() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  %x = invoke {i32,i32} @foo() to label %T unwind label %T2
+T:
+  %PN = phi i32 [0, %entry]
+  %y = extractvalue {i32,i32} %x, 1
+  ret i32 %y
+T2:
+  %exn = landingpad {i8*, i32}
+            cleanup
+  unreachable
+}
+
+declare i32 @__gxx_personality_v0(...)

Added: llvm/trunk/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,78 @@
+; RUN: opt -S -deadargelim < %s | FileCheck %s
+
+ at .str = private constant [1 x i8] zeroinitializer, align 1 ; <[1 x i8]*> [#uses=1]
+
+define i8* @vfs_addname(i8* %name, i32 %len, i32 %hash, i32 %flags) nounwind ssp {
+entry:
+  call void @llvm.dbg.value(metadata i8* %name, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
+  call void @llvm.dbg.value(metadata i32 %len, metadata !10, metadata !DIExpression()), !dbg !DILocation(scope: !1)
+  call void @llvm.dbg.value(metadata i32 %hash, metadata !11, metadata !DIExpression()), !dbg !DILocation(scope: !1)
+  call void @llvm.dbg.value(metadata i32 %flags, metadata !12, metadata !DIExpression()), !dbg !DILocation(scope: !1)
+; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) [[NUW:#[0-9]+]], !dbg !{{[0-9]+}}
+  %0 = call fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext 0, i32 %flags) nounwind, !dbg !13 ; <i8*> [#uses=1]
+  ret i8* %0, !dbg !13
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) noinline nounwind ssp {
+entry:
+  call void @llvm.dbg.value(metadata i8* %name, metadata !15, metadata !DIExpression()), !dbg !DILocation(scope: !16)
+  call void @llvm.dbg.value(metadata i32 %len, metadata !20, metadata !DIExpression()), !dbg !DILocation(scope: !16)
+  call void @llvm.dbg.value(metadata i32 %hash, metadata !21, metadata !DIExpression()), !dbg !DILocation(scope: !16)
+  call void @llvm.dbg.value(metadata i8 %extra, metadata !22, metadata !DIExpression()), !dbg !DILocation(scope: !16)
+  call void @llvm.dbg.value(metadata i32 %flags, metadata !23, metadata !DIExpression()), !dbg !DILocation(scope: !16)
+  %0 = icmp eq i32 %hash, 0, !dbg !24             ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1, !dbg !24
+
+bb:                                               ; preds = %entry
+  br label %bb2, !dbg !26
+
+bb1:                                              ; preds = %entry
+  br label %bb2, !dbg !27
+
+bb2:                                              ; preds = %bb1, %bb
+  %.0 = phi i8* [ getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0), %bb ], [ %name, %bb1 ] ; <i8*> [#uses=1]
+  ret i8* %.0, !dbg !27
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone
+
+; CHECK: attributes #0 = { nounwind ssp }
+; CHECK: attributes #1 = { nounwind readnone speculatable }
+; CHECK: attributes #2 = { noinline nounwind ssp }
+; CHECK: attributes [[NUW]] = { nounwind }
+
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!30}
+!0 = !DILocalVariable(name: "name", line: 8, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "vfs_addname", linkageName: "vfs_addname", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !3, file: !28, scope: !2, type: !4)
+!2 = !DIFile(filename: "tail.c", directory: "/Users/echeng/LLVM/radars/r7927803/")
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: FullDebug, file: !28, enums: !29, retainedTypes: !29)
+!4 = !DISubroutineType(types: !5)
+!5 = !{!6, !6, !9, !9, !9}
+!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !28, scope: !2, baseType: !7)
+!7 = !DIDerivedType(tag: DW_TAG_const_type, size: 8, align: 8, file: !28, scope: !2, baseType: !8)
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!10 = !DILocalVariable(name: "len", line: 9, arg: 2, scope: !1, file: !2, type: !9)
+!11 = !DILocalVariable(name: "hash", line: 10, arg: 3, scope: !1, file: !2, type: !9)
+!12 = !DILocalVariable(name: "flags", line: 11, arg: 4, scope: !1, file: !2, type: !9)
+!13 = !DILocation(line: 13, scope: !14)
+!14 = distinct !DILexicalBlock(line: 12, column: 0, file: !28, scope: !1)
+!15 = !DILocalVariable(name: "name", line: 17, arg: 1, scope: !16, file: !2, type: !6)
+!16 = distinct !DISubprogram(name: "add_name_internal", linkageName: "add_name_internal", line: 22, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !3, file: !28, scope: !2, type: !17)
+!17 = !DISubroutineType(types: !18)
+!18 = !{!6, !6, !9, !9, !19, !9}
+!19 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
+!20 = !DILocalVariable(name: "len", line: 18, arg: 2, scope: !16, file: !2, type: !9)
+!21 = !DILocalVariable(name: "hash", line: 19, arg: 3, scope: !16, file: !2, type: !9)
+!22 = !DILocalVariable(name: "extra", line: 20, arg: 4, scope: !16, file: !2, type: !19)
+!23 = !DILocalVariable(name: "flags", line: 21, arg: 5, scope: !16, file: !2, type: !9)
+!24 = !DILocation(line: 23, scope: !25)
+!25 = distinct !DILexicalBlock(line: 22, column: 0, file: !28, scope: !16)
+!26 = !DILocation(line: 24, scope: !25)
+!27 = !DILocation(line: 26, scope: !25)
+!28 = !DIFile(filename: "tail.c", directory: "/Users/echeng/LLVM/radars/r7927803/")
+!29 = !{}
+!30 = !{i32 1, !"Debug Info Version", i32 3}

Added: llvm/trunk/test/Transforms/DeadArgElim/2013-05-17-VarargsAndBlockAddress.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/2013-05-17-VarargsAndBlockAddress.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/2013-05-17-VarargsAndBlockAddress.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/2013-05-17-VarargsAndBlockAddress.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt %s -deadargelim -S | FileCheck %s
+
+
+ at block_addr = global i8* blockaddress(@varargs_func, %l1)
+; CHECK: @block_addr = global i8* blockaddress(@varargs_func, %l1)
+
+
+; This function is referenced by a "blockaddress" constant but it is
+; not address-taken, so the pass should be able to remove its unused
+; varargs.
+
+define internal i32 @varargs_func(i8* %addr, ...) {
+  indirectbr i8* %addr, [ label %l1, label %l2 ]
+l1:
+  ret i32 1
+l2:
+  ret i32 2
+}
+; CHECK: define internal i32 @varargs_func(i8* %addr) {
+
+define i32 @caller(i8* %addr) {
+  %r = call i32 (i8*, ...) @varargs_func(i8* %addr)
+  ret i32 %r
+}
+; CHECK: %r = call i32 @varargs_func(i8* %addr)

Added: llvm/trunk/test/Transforms/DeadArgElim/aggregates.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/aggregates.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/aggregates.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/aggregates.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,186 @@
+; RUN: opt -S -deadargelim %s | FileCheck %s
+
+; Case 0: the basic example: an entire aggregate use is returned, but it's
+; actually only used in ways we can eliminate. We gain benefit from analysing
+; the "use" and applying its results to all sub-values.
+
+; CHECK-LABEL: define internal void @agguse_dead()
+
+define internal { i32, i32 } @agguse_dead() {
+  ret { i32, i32 } { i32 0, i32 1 }
+}
+
+define internal { i32, i32 } @test_agguse_dead() {
+  %val = call { i32, i32 } @agguse_dead()
+  ret { i32, i32 } %val
+}
+
+
+
+; Case 1: an opaque use of the aggregate exists (in this case dead). Otherwise
+; only one value is used, so function can be simplified.
+
+; CHECK-LABEL: define internal i32 @rets_independent_if_agguse_dead()
+; CHECK: [[RET:%.*]] = extractvalue { i32, i32 } { i32 0, i32 1 }, 1
+; CHECK: ret i32 [[RET]]
+
+define internal { i32, i32 } @rets_independent_if_agguse_dead() {
+  ret { i32, i32 } { i32 0, i32 1 }
+}
+
+define internal { i32, i32 } @test_rets_independent_if_agguse_dead(i1 %tst) {
+  %val = call { i32, i32 } @rets_independent_if_agguse_dead()
+  br i1 %tst, label %use_1, label %use_aggregate
+
+use_1:
+  ; This use can be classified as applying only to ret 1.
+  %val0 = extractvalue { i32, i32 } %val, 1
+  call void @callee(i32 %val0)
+  ret { i32, i32 } undef
+
+use_aggregate:
+  ; This use is assumed to apply to both 0 and 1.
+  ret { i32, i32 } %val
+}
+
+; Case 2: an opaque use of the aggregate exists (in this case *live*). Other
+; uses shouldn't matter.
+
+; CHECK-LABEL: define internal { i32, i32 } @rets_live_agguse()
+; CHECK: ret { i32, i32 } { i32 0, i32 1 }
+
+define internal { i32, i32 } @rets_live_agguse() {
+  ret { i32, i32} { i32 0, i32 1 }
+}
+
+define { i32, i32 } @test_rets_live_aggues(i1 %tst) {
+  %val = call { i32, i32 } @rets_live_agguse()
+  br i1 %tst, label %use_1, label %use_aggregate
+
+use_1:
+  ; This use can be classified as applying only to ret 1.
+  %val0 = extractvalue { i32, i32 } %val, 1
+  call void @callee(i32 %val0)
+  ret { i32, i32 } undef
+
+use_aggregate:
+  ; This use is assumed to apply to both 0 and 1.
+  ret { i32, i32 } %val
+}
+
+declare void @callee(i32)
+
+; Case 3: the insertvalue meant %in was live if ret-slot-1 was, but we were only
+; tracking multiple ret-slots for struct types. So %in was eliminated
+; incorrectly.
+
+; CHECK-LABEL: define internal [2 x i32] @array_rets_have_multiple_slots(i32 %in)
+
+define internal [2 x i32] @array_rets_have_multiple_slots(i32 %in) {
+  %ret = insertvalue [2 x i32] undef, i32 %in, 1
+  ret [2 x i32] %ret
+}
+
+define [2 x i32] @test_array_rets_have_multiple_slots() {
+  %res = call [2 x i32] @array_rets_have_multiple_slots(i32 42)
+  ret [2 x i32] %res
+}
+
+; Case 4: we can remove some retvals from the array. It's nice to produce an
+; array again having done so (rather than converting it to a struct).
+
+; CHECK-LABEL: define internal [2 x i32] @can_shrink_arrays()
+; CHECK: [[VAL0:%.*]] = extractvalue [3 x i32] [i32 42, i32 43, i32 44], 0
+; CHECK: [[RESTMP:%.*]] = insertvalue [2 x i32] undef, i32 [[VAL0]], 0
+; CHECK: [[VAL2:%.*]] = extractvalue [3 x i32] [i32 42, i32 43, i32 44], 2
+; CHECK: [[RES:%.*]] = insertvalue [2 x i32] [[RESTMP]], i32 [[VAL2]], 1
+; CHECK: ret [2 x i32] [[RES]]
+
+; CHECK-LABEL: define void @test_can_shrink_arrays()
+
+define internal [3 x i32] @can_shrink_arrays() {
+  ret [3 x i32] [i32 42, i32 43, i32 44]
+}
+
+define void @test_can_shrink_arrays() {
+  %res = call [3 x i32] @can_shrink_arrays()
+
+  %res.0 = extractvalue [3 x i32] %res, 0
+  call void @callee(i32 %res.0)
+
+  %res.2 = extractvalue [3 x i32] %res, 2
+  call void @callee(i32 %res.2)
+
+  ret void
+}
+
+; Case 5: %in gets passed directly to the return. It should mark be marked as
+; used if *any* of the return values are, not just if value 0 is.
+
+; CHECK-LABEL: define internal i32 @ret_applies_to_all({ i32, i32 } %in)
+; CHECK: [[RET:%.*]] = extractvalue { i32, i32 } %in, 1
+; CHECK: ret i32 [[RET]]
+
+define internal {i32, i32} @ret_applies_to_all({i32, i32} %in) {
+  ret {i32, i32} %in
+}
+
+define i32 @test_ret_applies_to_all() {
+  %val = call {i32, i32} @ret_applies_to_all({i32, i32} {i32 42, i32 43})
+  %ret = extractvalue {i32, i32} %val, 1
+  ret i32 %ret
+}
+
+; Case 6: When considering @mid, the return instruciton has sub-value 0
+; unconditionally live, but 1 only conditionally live. Since at that level we're
+; applying the results to the whole of %res, this means %res is live and cannot
+; be reduced. There is scope for further optimisation here (though not visible
+; in this test-case).
+
+; CHECK-LABEL: define internal { i8*, i32 } @inner()
+
+define internal {i8*, i32} @mid() {
+  %res = call {i8*, i32} @inner()
+  %intval = extractvalue {i8*, i32} %res, 1
+  %tst = icmp eq i32 %intval, 42
+  br i1 %tst, label %true, label %true
+
+true:
+  ret {i8*, i32} %res
+}
+
+define internal {i8*, i32} @inner() {
+  ret {i8*, i32} {i8* null, i32 42}
+}
+
+define internal i8 @outer() {
+  %res = call {i8*, i32} @mid()
+  %resptr = extractvalue {i8*, i32} %res, 0
+
+  %val = load i8, i8* %resptr
+  ret i8 %val
+}
+
+define internal { i32 } @agg_ret() {
+entry:
+  unreachable
+}
+
+; CHECK-LABEL: define void @PR24906
+; CHECK: %[[invoke:.*]] = invoke i32 @agg_ret()
+; CHECK: %[[oldret:.*]] = insertvalue { i32 } undef, i32 %[[invoke]], 0
+; CHECK: phi { i32 } [ %[[oldret]],
+define void @PR24906() personality i32 (i32)* undef {
+entry:
+  %tmp2 = invoke { i32 } @agg_ret()
+          to label %bb3 unwind label %bb4
+
+bb3:
+  %tmp3 = phi { i32 } [ %tmp2, %entry ]
+  unreachable
+
+bb4:
+  %tmp4 = landingpad { i8*, i32 }
+          cleanup
+  unreachable
+}

Added: llvm/trunk/test/Transforms/DeadArgElim/allocsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/allocsize.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/allocsize.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/allocsize.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+; PR36867
+
+; CHECK-LABEL: @MagickMallocAligned
+; CHECK-NOT: allocsize
+define internal i64 @MagickMallocAligned(i64 %DEADARG1, i64 %s) allocsize(1) {
+        ret i64 %s
+}
+
+define i64 @NeedsArg(i64 %s) {
+	%c = call i64 @MagickMallocAligned(i64 0, i64 %s)
+	ret i64 %c
+}
+
+define i64 @Test2(i64 %s) {
+	%c = call i64 @MagickMallocAligned(i64 0, i64 %s) allocsize(1)
+	ret i64 %c
+}

Added: llvm/trunk/test/Transforms/DeadArgElim/basictest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/basictest.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/basictest.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/basictest.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -deadargelim -S | not grep DEADARG
+
+; test - an obviously dead argument
+define internal i32 @test(i32 %v, i32 %DEADARG1, i32* %p) {
+        store i32 %v, i32* %p
+        ret i32 %v
+}
+
+; hardertest - an argument which is only used by a call of a function with a 
+; dead argument.
+define internal i32 @hardertest(i32 %DEADARG2) {
+        %p = alloca i32         ; <i32*> [#uses=1]
+        %V = call i32 @test( i32 5, i32 %DEADARG2, i32* %p )            ; <i32> [#uses=1]
+        ret i32 %V
+}
+
+; evenhardertest - recursive dead argument...
+define internal void @evenhardertest(i32 %DEADARG3) {
+        call void @evenhardertest( i32 %DEADARG3 )
+        ret void
+}
+
+define internal void @needarg(i32 %TEST) {
+        call i32 @needarg2( i32 %TEST )         ; <i32>:1 [#uses=0]
+        ret void
+}
+
+define internal i32 @needarg2(i32 %TEST) {
+        ret i32 %TEST
+}
+
+define internal void @needarg3(i32 %TEST3) {
+        call void @needarg( i32 %TEST3 )
+        ret void
+}
+

Added: llvm/trunk/test/Transforms/DeadArgElim/call_profile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/call_profile.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/call_profile.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/call_profile.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt -deadargelim -S < %s | FileCheck %s
+
+; Checks if !prof metadata is corret in deadargelim.
+
+define void @caller() #0 {
+; CHECK: call void @test_vararg(), !prof ![[PROF:[0-9]]]
+; CHECK: call void @test(), !prof ![[PROF]]
+  call void (i32, ...) @test_vararg(i32 1), !prof !0
+  call void @test(i32 1), !prof !0
+  ret void
+}
+
+define internal void @test_vararg(i32, ...) #1 {
+  ret void
+}
+
+define internal void @test(i32 %a) #1 {
+  ret void
+}
+
+; CHECK:![[PROF]] = !{!"branch_weights", i32 30}
+!0 = !{!"branch_weights", i32 30}

Added: llvm/trunk/test/Transforms/DeadArgElim/canon.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/canon.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/canon.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/canon.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; This test shows a few canonicalizations made by deadargelim
+; RUN: opt < %s -deadargelim -S > %t
+; This test should remove {} and replace it with void
+; RUN: cat %t | grep "define internal void @test"
+; This test shouls replace the {i32} return value with just i32
+; RUN: cat %t | grep "define internal i32 @test2"
+
+define internal {} @test() {
+  ret {} undef
+}
+
+define internal {i32} @test2() {
+  ret {i32} undef
+}
+
+define void @caller() {
+  call {} @test()
+  %X = call {i32} @test2()
+  %Y = extractvalue {i32} %X, 0
+  call void @user(i32 %Y, {i32} %X)
+  ret void
+}
+
+declare void @user(i32, {i32})

Added: llvm/trunk/test/Transforms/DeadArgElim/comdat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/comdat.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/comdat.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/comdat.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,14 @@
+; RUN: opt -S < %s -deadargelim | FileCheck %s
+
+$f = comdat any
+
+define void @f() comdat {
+  call void @g(i32 0)
+  ret void
+}
+
+define internal void @g(i32 %dead) comdat($f) {
+  ret void
+}
+
+; CHECK: define internal void @g() comdat($f) {

Added: llvm/trunk/test/Transforms/DeadArgElim/dbginfo-preserve-dbgloc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/dbginfo-preserve-dbgloc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/dbginfo-preserve-dbgloc.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/dbginfo-preserve-dbgloc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,135 @@
+; RUN: opt -deadargelim -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.Channel = type { i32, i32 }
+
+; Function Attrs: nounwind uwtable
+define void @f2(i32 %m, i32 %n) #0 !dbg !7 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %m, metadata !12, metadata !DIExpression()), !dbg !21
+  call void @llvm.dbg.value(metadata i32 %n, metadata !13, metadata !DIExpression()), !dbg !22
+  call void @llvm.dbg.value(metadata %struct.Channel* null, metadata !14, metadata !DIExpression()), !dbg !23
+  %call = call %struct.Channel* (...) @foo(), !dbg !24
+  call void @llvm.dbg.value(metadata %struct.Channel* %call, metadata !14, metadata !DIExpression()), !dbg !23
+  %cmp = icmp sgt i32 %m, 3, !dbg !25
+  br i1 %cmp, label %if.then, label %if.end, !dbg !27
+
+if.then:                                          ; preds = %entry
+  %call1 = call zeroext i1 @f1(i1 zeroext true, %struct.Channel* %call), !dbg !28
+  br label %if.end, !dbg !28
+
+if.end:                                           ; preds = %if.then, %entry
+  %cmp2 = icmp sgt i32 %n, %m, !dbg !29
+  br i1 %cmp2, label %if.then3, label %if.end5, !dbg !31
+
+if.then3:                                         ; preds = %if.end
+  %call4 = call zeroext i1 @f1(i1 zeroext false, %struct.Channel* %call), !dbg !32
+  br label %if.end5, !dbg !32
+
+if.end5:                                          ; preds = %if.then3, %if.end
+  ret void, !dbg !33
+}
+
+declare %struct.Channel* @foo(...) local_unnamed_addr #1
+
+; Function Attrs: noinline nounwind uwtable
+define internal zeroext i1 @f1(i1 zeroext %is_y, %struct.Channel* %str) #4 !dbg !34 {
+entry:
+  %frombool = zext i1 %is_y to i8
+; CHECK: call void @llvm.dbg.value(metadata i1 %is_y, metadata !39, metadata !DIExpression()), !dbg !42
+  call void @llvm.dbg.value(metadata i1 %is_y, metadata !39, metadata !DIExpression()), !dbg !42
+; CHECK: call void @llvm.dbg.value(metadata %struct.Channel* %str, metadata !40, metadata !DIExpression()), !dbg !43
+  call void @llvm.dbg.value(metadata %struct.Channel* %str, metadata !40, metadata !DIExpression()), !dbg !43
+  call void @llvm.dbg.value(metadata %struct.Channel* null, metadata !41, metadata !DIExpression()), !dbg !44
+  %tobool = icmp ne %struct.Channel* %str, null, !dbg !45
+  br i1 %tobool, label %if.end, label %if.then, !dbg !47
+
+if.then:                                          ; preds = %entry
+  call void (...) @baa(), !dbg !48
+  br label %cleanup, !dbg !50
+
+if.end:                                           ; preds = %entry
+  %call = call %struct.Channel* (...) @foo(), !dbg !51
+  call void @llvm.dbg.value(metadata %struct.Channel* %call, metadata !41, metadata !DIExpression()), !dbg !44
+  %tobool1 = trunc i8 %frombool to i1, !dbg !52
+  br i1 %tobool1, label %if.then2, label %if.end3, !dbg !56
+
+if.then2:                                         ; preds = %if.end
+  call void (...) @baa(), !dbg !57
+  br label %cleanup, !dbg !56
+
+if.end3:                                          ; preds = %if.end
+  br label %cleanup, !dbg !56
+
+cleanup:                                          ; preds = %if.end3, %if.then2, %if.then
+  %retval.0 = phi i1 [ false, %if.then2 ], [ true, %if.end3 ], [ false, %if.then ]
+  ret i1 %retval.0, !dbg !56
+}
+
+declare void @baa(...) local_unnamed_addr #1
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.value(metadata, metadata, metadata) #3
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 7.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "test.c", directory: "/dir")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 7.0.0"}
+!7 = distinct !DISubprogram(name: "f2", scope: !1, file: !1, line: 31, type: !8, isLocal: false, isDefinition: true, scopeLine: 32, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !11)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10, !10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !{!12, !13, !14}
+!12 = !DILocalVariable(name: "m", arg: 1, scope: !7, file: !1, line: 31, type: !10)
+!13 = !DILocalVariable(name: "n", arg: 2, scope: !7, file: !1, line: 31, type: !10)
+!14 = !DILocalVariable(name: "str3", scope: !7, file: !1, line: 33, type: !15)
+!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64)
+!16 = !DIDerivedType(tag: DW_TAG_typedef, name: "channel", file: !1, line: 6, baseType: !17)
+!17 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Channel", file: !1, line: 3, size: 64, elements: !18)
+!18 = !{!19, !20}
+!19 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !17, file: !1, line: 4, baseType: !10, size: 32)
+!20 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !17, file: !1, line: 5, baseType: !10, size: 32, offset: 32)
+!21 = !DILocation(line: 31, column: 13, scope: !7)
+!22 = !DILocation(line: 31, column: 20, scope: !7)
+!23 = !DILocation(line: 33, column: 11, scope: !7)
+!24 = !DILocation(line: 34, column: 9, scope: !7)
+!25 = !DILocation(line: 36, column: 8, scope: !26)
+!26 = distinct !DILexicalBlock(scope: !7, file: !1, line: 36, column: 6)
+!27 = !DILocation(line: 36, column: 6, scope: !7)
+!28 = !DILocation(line: 37, column: 3, scope: !26)
+!29 = !DILocation(line: 39, column: 8, scope: !30)
+!30 = distinct !DILexicalBlock(scope: !7, file: !1, line: 39, column: 6)
+!31 = !DILocation(line: 39, column: 6, scope: !7)
+!32 = !DILocation(line: 40, column: 3, scope: !30)
+!33 = !DILocation(line: 41, column: 1, scope: !7)
+!34 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 12, type: !35, isLocal: true, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !38)
+!35 = !DISubroutineType(types: !36)
+!36 = !{!37, !37, !15}
+!37 = !DIBasicType(name: "_Bool", size: 8, encoding: DW_ATE_boolean)
+!38 = !{!39, !40, !41}
+!39 = !DILocalVariable(name: "is_y", arg: 1, scope: !34, file: !1, line: 12, type: !37)
+!40 = !DILocalVariable(name: "str", arg: 2, scope: !34, file: !1, line: 12, type: !15)
+!41 = !DILocalVariable(name: "str2", scope: !34, file: !1, line: 14, type: !15)
+!42 = !DILocation(line: 12, column: 21, scope: !34)
+!43 = !DILocation(line: 12, column: 36, scope: !34)
+!44 = !DILocation(line: 14, column: 11, scope: !34)
+!45 = !DILocation(line: 16, column: 7, scope: !46)
+!46 = distinct !DILexicalBlock(scope: !34, file: !1, line: 16, column: 6)
+!47 = !DILocation(line: 16, column: 6, scope: !34)
+!48 = !DILocation(line: 17, column: 3, scope: !49)
+!49 = distinct !DILexicalBlock(scope: !46, file: !1, line: 16, column: 11)
+!50 = !DILocation(line: 18, column: 3, scope: !49)
+!51 = !DILocation(line: 21, column: 9, scope: !34)
+!52 = !DILocation(line: 23, column: 6, scope: !34)
+!53 = !DILocation(line: 24, column: 3, scope: !54)
+!54 = distinct !DILexicalBlock(scope: !55, file: !1, line: 23, column: 11)
+!55 = distinct !DILexicalBlock(scope: !34, file: !1, line: 23, column: 6)
+!56 = !DILocation(line: 25, column: 3, scope: !54)
+!57 = !DILocation(line: 28, column: 2, scope: !34)

Added: llvm/trunk/test/Transforms/DeadArgElim/dbginfo-update-dbgval-local.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/dbginfo-update-dbgval-local.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/dbginfo-update-dbgval-local.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/dbginfo-update-dbgval-local.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt -deadargelim -S < %s | FileCheck %s
+
+; Verify that the dbg.value intrinsics that use the dead argument and return
+; value are marked as undef to indicate that the values are optimized out.
+
+; Reproducer for PR23260.
+
+; CHECK-LABEL: define internal void @bar()
+; CHECK: call void @llvm.dbg.value(metadata i32 undef, metadata ![[LOCAL1:[0-9]+]]
+; CHECK: call void @sink()
+
+; Function Attrs: alwaysinline nounwind uwtable
+define internal i32 @bar(i32 %deadarg) #1 !dbg !10 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %deadarg, metadata !15, metadata !DIExpression()), !dbg !17
+  call void @sink(), !dbg !17
+  ret i32 123, !dbg !17
+}
+
+; CHECK-LABEL: define void @foo()
+; CHECK: call void @bar()
+; CHECK: call void @llvm.dbg.value(metadata i32 undef, metadata ![[LOCAL2:[0-9]+]]
+; CHECK: call void @bar()
+
+; Function Attrs: nounwind uwtable
+define void @foo() #0 !dbg !6 {
+entry:
+  %deadret = call i32 @bar(i32 0), !dbg !9
+  call void @llvm.dbg.value(metadata i32 %deadret, metadata !16, metadata !DIExpression()), !dbg !9
+  call i32 @bar(i32 1), !dbg !9
+  ret void, !dbg !9
+}
+
+declare void @sink() local_unnamed_addr
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.value(metadata, metadata, metadata) #2
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { alwaysinline nounwind uwtable }
+attributes #2 = { nounwind readnone speculatable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+; CHECK: ![[LOCAL1]] = !DILocalVariable(name: "local1"
+; CHECK: ![[LOCAL2]] = !DILocalVariable(name: "local2"
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "pr23260.c", directory: "/")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 8.0.0"}
+!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null}
+!9 = !DILocation(line: 4, column: 3, scope: !6)
+!10 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !14)
+!11 = !DISubroutineType(types: !12)
+!12 = !{!13, !13}
+!13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!14 = !{!15}
+!15 = !DILocalVariable(name: "local1", arg: 1, scope: !10, file: !1, line: 2, type: !13)
+!16 = !DILocalVariable(name: "local2", arg: 1, scope: !6, file: !1, line: 2, type: !13)
+!17 = !DILocation(line: 2, column: 52, scope: !10)

Added: llvm/trunk/test/Transforms/DeadArgElim/dbginfo-update-dbgval.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/dbginfo-update-dbgval.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/dbginfo-update-dbgval.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/dbginfo-update-dbgval.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,86 @@
+; RUN: opt -deadargelim -S < %s | FileCheck %s
+;test.c
+;int s;
+;
+;void f2(int k) __attribute__((noinline)) {
+; s++;
+; k = s;
+;}
+;
+;void f() __attribute__((noinline)) {
+; f2(4);
+;}
+;
+;int main()
+;{
+; f();
+; return 0;
+;}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+ at s = common dso_local local_unnamed_addr global i32 0, align 4, !dbg !0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @f2(i32 %k) local_unnamed_addr !dbg !11 {
+entry:
+; CHECK: call void @llvm.dbg.value(metadata i32 undef, metadata !15, metadata !DIExpression()), !dbg !16
+  call void @llvm.dbg.value(metadata i32 %k, metadata !15, metadata !DIExpression()), !dbg !16
+  %0 = load i32, i32* @s, align 4, !dbg !17
+  %inc = add nsw i32 %0, 1, !dbg !17
+  store i32 %inc, i32* @s, align 4, !dbg !17
+  call void @llvm.dbg.value(metadata i32* @s, metadata !15, metadata !DIExpression(DW_OP_deref)), !dbg !16
+  ret void, !dbg !18
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @f() local_unnamed_addr !dbg !19 {
+entry:
+; CHECK: tail call void @f2(i32 undef), !dbg !22
+  tail call void @f2(i32 4), !dbg !22
+  ret void, !dbg !23
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr !dbg !24 {
+entry:
+  tail call void @f(), !dbg !27
+  ret i32 0, !dbg !28
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "s", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 8.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
+!3 = !DIFile(filename: "test.c", directory: "/dir")
+!4 = !{}
+!5 = !{!0}
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{!"clang version 7.0.0"}
+!11 = distinct !DISubprogram(name: "f2", scope: !3, file: !3, line: 3, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !2, retainedNodes: !14)
+!12 = !DISubroutineType(types: !13)
+!13 = !{null, !6}
+!14 = !{!15}
+!15 = !DILocalVariable(name: "k", arg: 1, scope: !11, file: !3, line: 3, type: !6)
+!16 = !DILocation(line: 3, column: 13, scope: !11)
+!17 = !DILocation(line: 4, column: 3, scope: !11)
+!18 = !DILocation(line: 6, column: 1, scope: !11)
+!19 = distinct !DISubprogram(name: "f", scope: !3, file: !3, line: 8, type: !20, isLocal: false, isDefinition: true, scopeLine: 8, isOptimized: true, unit: !2, retainedNodes: !4)
+!20 = !DISubroutineType(types: !21)
+!21 = !{null}
+!22 = !DILocation(line: 9, column: 2, scope: !19)
+!23 = !DILocation(line: 10, column: 1, scope: !19)
+!24 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 12, type: !25, isLocal: false, isDefinition: true, scopeLine: 12, isOptimized: true, unit: !2, retainedNodes: !4)
+!25 = !DISubroutineType(types: !26)
+!26 = !{!6}
+!27 = !DILocation(line: 13, column: 2, scope: !24)
+!28 = !DILocation(line: 14, column: 2, scope: !24)

Added: llvm/trunk/test/Transforms/DeadArgElim/dbginfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/dbginfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/dbginfo.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/dbginfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,70 @@
+; RUN: opt -deadargelim -S < %s | FileCheck %s
+; PR14016
+
+; Built with clang (then manually running -mem2reg with opt) from the following source:
+; static void f1(int, ...) {
+; }
+;
+; void f2() {
+;   f1(1);
+; }
+
+; Test both varargs removal and removal of a traditional dead arg together, to
+; test both the basic functionality, and a particular wrinkle involving updating
+; the function->debug info mapping on update to ensure it's accurate when used
+; again for the next removal.
+
+; CHECK: define internal void @_ZL2f1iz({{.*}} !dbg [[SP:![0-9]+]]
+; CHECK: [[SP]] = distinct !DISubprogram(name: "f1"
+
+; Check that debug info metadata for subprograms stores pointers to
+; updated LLVM functions.
+
+; Function Attrs: uwtable
+define void @_Z2f2v() #0 !dbg !4 {
+entry:
+  call void (i32, ...) @_ZL2f1iz(i32 1), !dbg !15
+  ret void, !dbg !16
+}
+
+; Function Attrs: nounwind uwtable
+define internal void @_ZL2f1iz(i32, ...) #1 !dbg !8 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %0, metadata !17, metadata !18), !dbg !19
+  ret void, !dbg !20
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, metadata, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+!llvm.ident = !{!14}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "dbg.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 4, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "dbg.cpp", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1iz", line: 1, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !9, retainedNodes: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{null, !11, null}
+!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!12 = !{i32 2, !"Dwarf Version", i32 4}
+!13 = !{i32 2, !"Debug Info Version", i32 3}
+!14 = !{!"clang version 3.6.0 "}
+!15 = !DILocation(line: 5, column: 3, scope: !4)
+!16 = !DILocation(line: 6, column: 1, scope: !4)
+!17 = !DILocalVariable(name: "", line: 1, arg: 1, scope: !8, file: !5, type: !11)
+!18 = !DIExpression()
+!19 = !DILocation(line: 1, column: 19, scope: !8)
+!20 = !DILocation(line: 2, column: 1, scope: !8)

Added: llvm/trunk/test/Transforms/DeadArgElim/dead_vaargs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/dead_vaargs.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/dead_vaargs.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/dead_vaargs.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+define i32 @bar(i32 %A) {
+  call void (i32, ...) @thunk(i32 %A, i64 47, double 1.000000e+00)
+  %a = call i32 (i32, ...) @has_vastart(i32 %A, i64 47, double 1.000000e+00)
+  %b = call i32 (i32, ...) @no_vastart( i32 %A, i32 %A, i32 %A, i32 %A, i64 47, double 1.000000e+00 )
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+; CHECK-LABEL: define i32 @bar
+; CHECK: call void (i32, ...) @thunk(i32 %A, i64 47, double 1.000000e+00)
+; CHECK: call i32 (i32, ...) @has_vastart(i32 %A, i64 47, double 1.000000e+00)
+; CHECK: call i32 @no_vastart(i32 %A)
+
+declare void @thunk_target(i32 %X, ...)
+
+define internal void @thunk(i32 %X, ...) {
+  musttail call void(i32, ...) @thunk_target(i32 %X, ...)
+  ret void
+}
+; CHECK-LABEL: define internal void @thunk(i32 %X, ...)
+; CHECK: musttail call void (i32, ...) @thunk_target(i32 %X, ...)
+
+define internal i32 @has_vastart(i32 %X, ...) {
+  %valist = alloca i8
+  call void @llvm.va_start(i8* %valist)
+  ret i32 %X
+}
+; CHECK-LABEL: define internal i32 @has_vastart(i32 %X, ...)
+
+declare void @llvm.va_start(i8*)
+
+define internal i32 @no_vastart(i32 %X, ...) {
+  ret i32 %X
+}
+; CHECK-LABEL: define internal i32 @no_vastart(i32 %X)

Added: llvm/trunk/test/Transforms/DeadArgElim/deadexternal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/deadexternal.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/deadexternal.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/deadexternal.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt -deadargelim -S < %s | FileCheck %s
+
+define void @test(i32) {
+  ret void
+}
+
+define void @foo() {
+  call void @test(i32 0)
+  ret void
+; CHECK-LABEL: @foo(
+; CHECK: i32 undef
+}
+
+define void @f(i32 %X) {
+entry:
+  tail call void @sideeffect() nounwind
+  ret void
+}
+
+declare void @sideeffect()
+
+define void @g(i32 %n) {
+entry:
+  %add = add nsw i32 %n, 1
+; CHECK: tail call void @f(i32 undef)
+  tail call void @f(i32 %add)
+  ret void
+}
+
+define void @h() {
+entry:
+  %i = alloca i32, align 4
+  store volatile i32 10, i32* %i, align 4
+; CHECK: %tmp = load volatile i32, i32* %i, align 4
+; CHECK-NEXT: call void @f(i32 undef)
+  %tmp = load volatile i32, i32* %i, align 4
+  call void @f(i32 %tmp)
+  ret void
+}
+
+; Check that callers are not transformed for weak definitions.
+define weak i32 @weak_f(i32 %x) nounwind {
+entry:
+  ret i32 0
+}
+define void @weak_f_caller() nounwind {
+entry:
+; CHECK: call i32 @weak_f(i32 10)
+  %call = tail call i32 @weak_f(i32 10)
+  ret void
+}
+
+%swift_error = type opaque
+
+define void @unused_swifterror_arg(%swift_error** swifterror %dead_arg) {
+  tail call void @sideeffect() nounwind
+  ret void
+}
+
+; CHECK-LABEL: @dont_replace_by_undef
+; CHECK-NOT: call void @unused_swifterror_arg({{.*}}undef)
+define void @dont_replace_by_undef() {
+  %error_ptr_ref = alloca swifterror %swift_error*
+  store %swift_error* null, %swift_error** %error_ptr_ref
+  call void @unused_swifterror_arg(%swift_error** %error_ptr_ref)
+  ret void
+}

Added: llvm/trunk/test/Transforms/DeadArgElim/deadretval.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/deadretval.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/deadretval.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/deadretval.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt < %s -deadargelim -S | not grep DEAD
+
+; Dead arg only used by dead retval
+define internal i32 @test(i32 %DEADARG) {
+        ret i32 %DEADARG
+}
+
+define i32 @test2(i32 %A) {
+        %DEAD = call i32 @test( i32 %A )                ; <i32> [#uses=0]
+        ret i32 123
+}
+
+define i32 @test3() {
+        %X = call i32 @test2( i32 3232 )                ; <i32> [#uses=1]
+        %Y = add i32 %X, -123           ; <i32> [#uses=1]
+        ret i32 %Y
+}
+

Added: llvm/trunk/test/Transforms/DeadArgElim/deadretval2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/deadretval2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/deadretval2.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/deadretval2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,59 @@
+; RUN: opt < %s -deadargelim -die -S > %t
+; RUN: cat %t | not grep DEAD
+; RUN: cat %t | grep LIVE | count 4
+
+ at P = external global i32                ; <i32*> [#uses=1]
+
+; Dead arg only used by dead retval
+define internal i32 @test(i32 %DEADARG) {
+        ret i32 %DEADARG
+}
+
+define internal i32 @test2(i32 %DEADARG) {
+        %DEADRETVAL = call i32 @test( i32 %DEADARG )            ; <i32> [#uses=1]
+        ret i32 %DEADRETVAL
+}
+
+define void @test3(i32 %X) {
+        %DEADRETVAL = call i32 @test2( i32 %X )         ; <i32> [#uses=0]
+        ret void
+}
+
+define internal i32 @foo() {
+        %DEAD = load i32, i32* @P            ; <i32> [#uses=1]
+        ret i32 %DEAD
+}
+
+define internal i32 @id(i32 %X) {
+        ret i32 %X
+}
+
+define void @test4() {
+        %DEAD = call i32 @foo( )                ; <i32> [#uses=1]
+        %DEAD2 = call i32 @id( i32 %DEAD )              ; <i32> [#uses=0]
+        ret void
+}
+
+; These test if returning another functions return value properly marks that
+; other function's return value as live. We do this twice, with the functions in
+; different orders (ie, first the caller, than the callee and first the callee
+; and then the caller) since DAE processes functions one by one and handles
+; these cases slightly different.
+
+define internal i32 @test5() {
+  ret i32 123 
+}
+
+define i32 @test6() {
+  %LIVE = call i32 @test5()
+  ret i32 %LIVE
+}
+
+define i32 @test7() {
+  %LIVE = call i32 @test8()
+  ret i32 %LIVE
+}
+
+define internal i32 @test8() {
+  ret i32 124
+}

Added: llvm/trunk/test/Transforms/DeadArgElim/func_metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/func_metadata.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/func_metadata.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/func_metadata.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt -deadargelim -S < %s | FileCheck %s
+
+; Check if function level metadatas are properly cloned.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at s = common dso_local local_unnamed_addr global i32 0, align 4
+
+define internal i32 @va_func(i32 %num, ...) !prof !28 !PGOFuncName !29{
+; CHECK: define internal void @va_func(i32 %num) !prof ![[ENTRYCOUNT:[0-9]+]] !PGOFuncName ![[PGOFUNCNAME1:[0-9]+]] {
+entry:
+  %0 = load i32, i32* @s, align 4, !tbaa !31
+  %add = add nsw i32 %0, %num
+  store i32 %add, i32* @s, align 4, !tbaa !31
+  ret i32 0
+}
+
+define internal fastcc i32 @foo() unnamed_addr !prof !28 !PGOFuncName !30 {
+; CHECK: define internal fastcc void @foo() unnamed_addr !prof ![[ENTRYCOUNT:[0-9]+]] !PGOFuncName ![[PGOFUNCNAME2:[0-9]+]] {
+entry:
+  %0 = load i32, i32* @s, align 4, !tbaa !31
+  %add = add nsw i32 %0, 8
+  store i32 %add, i32* @s, align 4, !tbaa !31
+  ret i32 0
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 2}
+!5 = !{!"MaxCount", i64 1}
+!6 = !{!"MaxInternalCount", i64 0}
+!7 = !{!"MaxFunctionCount", i64 1}
+!8 = !{!"NumCounts", i64 2}
+!9 = !{!"NumFunctions", i64 2}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14, !15, !16, !17, !17, !18, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27}
+!12 = !{i32 10000, i64 0, i32 0}
+!13 = !{i32 100000, i64 0, i32 0}
+!14 = !{i32 200000, i64 0, i32 0}
+!15 = !{i32 300000, i64 0, i32 0}
+!16 = !{i32 400000, i64 0, i32 0}
+!17 = !{i32 500000, i64 1, i32 2}
+!18 = !{i32 600000, i64 1, i32 2}
+!19 = !{i32 700000, i64 1, i32 2}
+!20 = !{i32 800000, i64 1, i32 2}
+!21 = !{i32 900000, i64 1, i32 2}
+!22 = !{i32 950000, i64 1, i32 2}
+!23 = !{i32 990000, i64 1, i32 2}
+!24 = !{i32 999000, i64 1, i32 2}
+!25 = !{i32 999900, i64 1, i32 2}
+!26 = !{i32 999990, i64 1, i32 2}
+!27 = !{i32 999999, i64 1, i32 2}
+!28 = !{!"function_entry_count", i64 1}
+; CHECK: ![[ENTRYCOUNT]] = !{!"function_entry_count", i64 1}
+!29 = !{!"foo.c:va_func"}
+; CHECK: ![[PGOFUNCNAME1]] = !{!"foo.c:va_func"}
+!30 = !{!"foo.c:foo"}
+; CHECK: ![[PGOFUNCNAME2]] = !{!"foo.c:foo"}
+!31 = !{!32, !32, i64 0}
+!32 = !{!"int", !33, i64 0}
+!33 = !{!"omnipotent char", !34, i64 0}
+!34 = !{!"Simple C/C++ TBAA"}

Added: llvm/trunk/test/Transforms/DeadArgElim/funclet.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/funclet.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/funclet.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/funclet.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt -S -deadargelim < %s | FileCheck %s
+target triple = "x86_64-pc-windows-msvc"
+
+define internal void @callee(i8*) {
+entry:
+  call void @thunk()
+  ret void
+}
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @thunk()
+          to label %good1 unwind label %bad1
+
+good1:                                            ; preds = %entry-block
+  ret void
+
+bad1:                                             ; preds = %entry-block
+  %pad1 = cleanuppad within none []
+  call void @callee(i8* null) [ "funclet"(token %pad1) ]
+  cleanupret from %pad1 unwind to caller
+}
+; CHECK-LABEL: define void @test1(
+; CHECK:      %[[pad:.*]] = cleanuppad within none []
+; CHECK-NEXT: call void @callee() [ "funclet"(token %[[pad]]) ]
+
+declare void @thunk()
+
+declare i32 @__CxxFrameHandler3(...)

Added: llvm/trunk/test/Transforms/DeadArgElim/keepalive.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/keepalive.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/keepalive.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/keepalive.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+%Ty = type <{ i32, i32 }>
+
+; Check if the pass doesn't modify anything that doesn't need changing. We feed
+; an unused argument to each function to lure it into changing _something_ about
+; the function and then changing too much.
+
+; This checks if the return value attributes are not removed
+; CHECK: define internal zeroext i32 @test1() #0
+define internal zeroext i32 @test1(i32 %DEADARG1) nounwind {
+        ret i32 1
+}
+
+; This checks if the struct doesn't get non-packed
+; CHECK-LABEL: define internal <{ i32, i32 }> @test2(
+define internal <{ i32, i32 }> @test2(i32 %DEADARG1) {
+        ret <{ i32, i32 }> <{ i32 1, i32 2 }>
+}
+
+; We use this external function to make sure the return values don't become dead
+declare void @user(i32, <{ i32, i32 }>)
+
+define void @caller() {
+        %B = call i32 @test1(i32 1)
+        %C = call <{ i32, i32 }> @test2(i32 2)
+        call void @user(i32 %B, <{ i32, i32 }> %C)
+        ret void
+}
+
+; We can't remove 'this' here, as that would put argmem in ecx instead of
+; memory.
+define internal x86_thiscallcc i32 @unused_this(i32* %this, i32* inalloca %argmem) {
+	%v = load i32, i32* %argmem
+	ret i32 %v
+}
+; CHECK-LABEL: define internal x86_thiscallcc i32 @unused_this(i32* %this, i32* inalloca %argmem)
+
+define i32 @caller2() {
+	%t = alloca i32
+	%m = alloca inalloca i32
+	store i32 42, i32* %m
+	%v = call x86_thiscallcc i32 @unused_this(i32* %t, i32* inalloca %m)
+	ret i32 %v
+}
+
+; CHECK: attributes #0 = { nounwind }

Added: llvm/trunk/test/Transforms/DeadArgElim/linkage.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/linkage.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/linkage.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/linkage.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+; rdar://11546243
+%struct.A = type { i8 }
+
+define available_externally void @_Z17externallyDefinedP1A(%struct.A* %a) {
+entry:
+  call void @_Z3foov()
+  ret void
+}
+
+declare void @_Z3foov()
+
+define void @_Z4testP1A(%struct.A* %a) {
+; CHECK: @_Z4testP1A
+; CHECK: @_Z17externallyDefinedP1A(%struct.A* %a)
+
+entry:
+  call void @_Z17externallyDefinedP1A(%struct.A* %a)
+  ret void
+}

Added: llvm/trunk/test/Transforms/DeadArgElim/multdeadretval.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/multdeadretval.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/multdeadretval.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/multdeadretval.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,68 @@
+; This test sees if return values (and arguments) are properly removed when they
+; are unused. All unused values are typed i16, so we can easily check. We also
+; run instcombine to fold insert/extractvalue chains and we run dce to clean up
+; any remaining dead stuff.
+; RUN: opt < %s -deadargelim -instcombine -dce -S | not grep i16
+
+define internal {i16, i32} @test(i16 %DEADARG) {
+        %A = insertvalue {i16,i32} undef, i16 1, 0
+        %B = insertvalue {i16,i32} %A, i32 1001, 1
+        ret {i16,i32} %B
+}
+
+define internal {i32, i16} @test2() {
+        %DEAD = call i16 @test4()
+        %A = insertvalue {i32,i16} undef, i32 1, 0
+        %B = insertvalue {i32,i16} %A, i16 %DEAD, 1
+        ret {i32,i16} %B
+}
+
+; Dead argument, used to check if the second result of test2 is dead even when
+; it's used as a dead argument
+define internal i32 @test3(i16 %A) {
+        %ret = call {i16, i32} @test( i16 %A )                ; <i32> [#uses=0]
+        %DEAD = extractvalue {i16, i32} %ret, 0
+        %LIVE = extractvalue {i16, i32} %ret, 1
+        ret i32 %LIVE
+}
+
+define internal i16 @test4() {
+        ret i16 0
+}
+
+; Multiple return values, multiple live return values
+define internal {i32, i32, i16} @test5() {
+        %A = insertvalue {i32,i32,i16} undef, i32 1, 0
+        %B = insertvalue {i32,i32,i16} %A, i32 2, 1
+        %C = insertvalue {i32,i32,i16} %B, i16 3, 2
+        ret {i32, i32, i16} %C
+}
+
+; Nested return values
+define internal {{i32}, {i16, i16}} @test6() {
+        %A = insertvalue {{i32}, {i16, i16}} undef, i32 1, 0, 0
+        %B = insertvalue {{i32}, {i16, i16}} %A, i16 2, 1, 0
+        %C = insertvalue {{i32}, {i16, i16}} %B, i16 3, 1, 1
+        ret {{i32}, {i16, i16}} %C
+}
+
+define i32 @main() {
+        %ret = call {i32, i16} @test2()                ; <i32> [#uses=1]
+        %LIVE = extractvalue {i32, i16} %ret, 0
+        %DEAD = extractvalue {i32, i16} %ret, 1
+        %Y = add i32 %LIVE, -123           ; <i32> [#uses=1]
+        %LIVE2 = call i32 @test3(i16 %DEAD)                ; <i32> [#uses=1]
+        %Z = add i32 %LIVE2, %Y           ; <i32> [#uses=1]
+        %ret1 = call { i32, i32, i16 } @test5 ()
+        %LIVE3 = extractvalue { i32, i32, i16} %ret1, 0
+        %LIVE4 = extractvalue { i32, i32, i16} %ret1, 1
+        %DEAD2 = extractvalue { i32, i32, i16} %ret1, 2
+        %V = add i32 %LIVE3, %LIVE4
+        %W = add i32 %Z, %V
+        %ret2 = call { { i32 }, { i16, i16 } } @test6 ()
+        %LIVE5 = extractvalue { { i32 }, { i16, i16 } } %ret2, 0, 0
+        %DEAD3 = extractvalue { { i32 }, { i16, i16 } } %ret2, 1, 0
+        %DEAD4 = extractvalue { { i32 }, { i16, i16 } } %ret2, 1, 1
+        %Q = add i32 %W, %LIVE5
+        ret i32 %Q
+}

Added: llvm/trunk/test/Transforms/DeadArgElim/musttail-caller.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/musttail-caller.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/musttail-caller.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/musttail-caller.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; RUN: opt -deadargelim -S < %s | FileCheck %s
+; PR36441
+; Dead arguments should not be removed in presence of `musttail` calls.
+
+; CHECK-LABEL: define internal void @test(i32 %a, i32 %b)
+; CHECK: musttail call void @foo(i32 %a, i32 0)
+; FIXME: we should replace those with `undef`s
+define internal void @test(i32 %a, i32 %b) {
+  musttail call void @foo(i32 %a, i32 0)
+  ret void
+}
+
+; CHECK-LABEL: define internal void @foo(i32 %a, i32 %b)
+define internal void @foo(i32 %a, i32 %b) {
+  ret void
+}

Added: llvm/trunk/test/Transforms/DeadArgElim/naked_functions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/naked_functions.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/naked_functions.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/naked_functions.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; RUN: opt -S -deadargelim %s | FileCheck %s
+
+; Don't eliminate dead arugments from naked functions.
+; CHECK: define internal i32 @naked(i32 %x)
+
+define internal i32 @naked(i32 %x) #0 {
+  tail call void asm sideeffect inteldialect "mov eax, [esp + $$4]\0A\09ret", "~{eax},~{dirflag},~{fpsr},~{flags}"()
+  unreachable
+}
+
+
+; Don't eliminate dead varargs from naked functions.
+; CHECK: define internal i32 @naked_va(i32 %x, ...)
+
+define internal i32 @naked_va(i32 %x, ...) #0 {
+  tail call void asm sideeffect inteldialect "mov eax, [esp + $$8]\0A\09ret", "~{eax},~{dirflag},~{fpsr},~{flags}"()
+  unreachable
+}
+
+define i32 @f(i32 %x, i32 %y) {
+  %r = call i32 @naked(i32 %x)
+  %s = call i32 (i32, ...) @naked_va(i32 %x, i32 %r)
+
+; Make sure the arguments are still there: not removed or replaced with undef.
+; CHECK: %r = call i32 @naked(i32 %x)
+; CHECK: %s = call i32 (i32, ...) @naked_va(i32 %x, i32 %r)
+
+  ret i32 %s
+}
+
+attributes #0 = { naked }

Added: llvm/trunk/test/Transforms/DeadArgElim/nonzero-address-spaces.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/nonzero-address-spaces.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/nonzero-address-spaces.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/nonzero-address-spaces.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt -S -deadargelim %s | FileCheck %s
+
+; DeadArgumentElimination should respect the function address space
+; in the data layout.
+
+target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
+
+; CHECK: define internal i32 @foo() addrspace(1)
+define internal i32 @foo(i32 %x) #0 {
+  tail call void asm sideeffect inteldialect "mov eax, [esp + $$4]\0A\09ret", "~{eax},~{dirflag},~{fpsr},~{flags}"()
+  unreachable
+}
+
+define i32 @f(i32 %x, i32 %y) {
+  ; CHECK: %r = call addrspace(1) i32 @foo()
+  %r = call i32 @foo(i32 %x)
+
+  ret i32 %r
+}
+

Added: llvm/trunk/test/Transforms/DeadArgElim/operandbundle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/operandbundle.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/operandbundle.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/operandbundle.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,12 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+define internal void @f(i32 %arg) {
+entry:
+  call void @g() [ "foo"(i32 %arg) ]
+  ret void
+}
+
+; CHECK-LABEL: define internal void @f(
+; CHECK: call void @g() [ "foo"(i32 %arg) ]
+
+declare void @g()

Added: llvm/trunk/test/Transforms/DeadArgElim/returned.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/returned.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/returned.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/returned.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,55 @@
+; RUN: opt < %s -passes=deadargelim -S | FileCheck %s
+
+%Ty = type { i32, i32 }
+
+; sanity check that the argument and return value are both dead
+; CHECK-LABEL: define internal void @test1()
+
+define internal %Ty* @test1(%Ty* %this) {
+  ret %Ty* %this
+}
+
+; do not keep alive the return value of a function with a dead 'returned' argument
+; CHECK-LABEL: define internal void @test2()
+
+define internal %Ty* @test2(%Ty* returned %this) {
+  ret %Ty* %this
+}
+
+; dummy to keep 'this' alive
+ at dummy = global %Ty* null 
+
+; sanity check that return value is dead
+; CHECK-LABEL: define internal void @test3(%Ty* %this)
+
+define internal %Ty* @test3(%Ty* %this) {
+  store volatile %Ty* %this, %Ty** @dummy
+  ret %Ty* %this
+}
+
+; keep alive return value of a function if the 'returned' argument is live
+; CHECK-LABEL: define internal %Ty* @test4(%Ty* returned %this)
+
+define internal %Ty* @test4(%Ty* returned %this) {
+  store volatile %Ty* %this, %Ty** @dummy
+  ret %Ty* %this
+}
+
+; don't do this if 'returned' is on the call site...
+; CHECK-LABEL: define internal void @test5(%Ty* %this)
+
+define internal %Ty* @test5(%Ty* %this) {
+  store volatile %Ty* %this, %Ty** @dummy
+  ret %Ty* %this
+}
+
+define %Ty* @caller(%Ty* %this) {
+  %1 = call %Ty* @test1(%Ty* %this)
+  %2 = call %Ty* @test2(%Ty* %this)
+  %3 = call %Ty* @test3(%Ty* %this)
+  %4 = call %Ty* @test4(%Ty* %this)
+; ...instead, drop 'returned' form the call site
+; CHECK: call void @test5(%Ty* %this)
+  %5 = call %Ty* @test5(%Ty* returned %this)
+  ret %Ty* %this
+}

Added: llvm/trunk/test/Transforms/DeadArgElim/variadic_safety.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadArgElim/variadic_safety.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadArgElim/variadic_safety.ll (added)
+++ llvm/trunk/test/Transforms/DeadArgElim/variadic_safety.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+declare void @llvm.va_start(i8*)
+
+define internal i32 @va_func(i32 %a, i32 %b, ...) {
+  %valist = alloca i8
+  call void @llvm.va_start(i8* %valist)
+
+  ret i32 %b
+}
+
+; Function derived from AArch64 ABI, where 8 integer arguments go in
+; registers but the 9th goes on the stack. We really don't want to put
+; just 7 args in registers and then start on the stack since any
+; va_arg implementation already present in va_func won't be expecting
+; it.
+define i32 @call_va(i32 %in) {
+  %stacked = alloca i32
+  store i32 42, i32* %stacked
+  %res = call i32(i32, i32, ...) @va_func(i32 %in, i32 %in, [6 x i32] undef, i32* byval %stacked)
+  ret i32 %res
+; CHECK: call i32 (i32, i32, ...) @va_func(i32 undef, i32 %in, [6 x i32] undef, i32* byval %stacked)
+}
+
+define internal i32 @va_deadret_func(i32 %a, i32 %b, ...) {
+  %valist = alloca i8
+  call void @llvm.va_start(i8* %valist)
+
+  ret i32 %a
+}
+
+define void @call_deadret(i32 %in) {
+  %stacked = alloca i32
+  store i32 42, i32* %stacked
+  call i32 (i32, i32, ...) @va_deadret_func(i32 undef, i32 %in, [6 x i32] undef, i32* byval %stacked)
+  ret void
+; CHECK: call void (i32, i32, ...) @va_deadret_func(i32 undef, i32 undef, [6 x i32] undef, i32* byval %stacked)
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+; PR9561
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.8"
+
+ at A = external global [0 x i32]
+
+declare ghccc void @Func2(i32*, i32*, i32*, i32)
+
+define ghccc void @Func1(i32* noalias %Arg1, i32* noalias %Arg2, i32* %Arg3, i32 %Arg4) {
+entry:
+  store i32 add (i32 ptrtoint ([0 x i32]* @A to i32), i32 1), i32* %Arg2
+; CHECK: store i32 add (i32 ptrtoint ([0 x i32]* @A to i32), i32 1), i32* %Arg2
+  %ln2gz = getelementptr i32, i32* %Arg1, i32 14
+  %ln2gA = bitcast i32* %ln2gz to double*
+  %ln2gB = load double, double* %ln2gA
+  %ln2gD = getelementptr i32, i32* %Arg2, i32 -3
+  %ln2gE = bitcast i32* %ln2gD to double*
+  store double %ln2gB, double* %ln2gE
+; CHECK: store double %ln2gB, double* %ln2gE
+  tail call ghccc void @Func2(i32* %Arg1, i32* %Arg2, i32* %Arg3, i32 %Arg4) nounwind
+  ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt -dse -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin"
+
+%"class.std::auto_ptr" = type { i32* }
+
+; CHECK-LABEL: @_Z3foov(
+define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) uwtable ssp {
+_ZNSt8auto_ptrIiED1Ev.exit:
+  %temp.lvalue = alloca %"class.std::auto_ptr", align 8
+  call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue)
+  %_M_ptr.i.i = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0
+  %tmp.i.i = load i32*, i32** %_M_ptr.i.i, align 8
+; CHECK-NOT: store i32* null
+  store i32* null, i32** %_M_ptr.i.i, align 8
+  %_M_ptr.i.i4 = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %agg.result, i64 0, i32 0
+  store i32* %tmp.i.i, i32** %_M_ptr.i.i4, align 8
+; CHECK: ret void
+  ret void
+}
+
+declare void @_Z3barv(%"class.std::auto_ptr"* sret)

Added: llvm/trunk/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,85 @@
+; RUN: opt -dse -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.pair.162 = type { %struct.BasicBlock*, i32, [4 x i8] }
+%struct.BasicBlock = type { %struct.Value, %struct.ilist_node.24, %struct.iplist.22, %struct.Function* }
+%struct.Value = type { i32 (...)**, i8, i8, i16, %struct.Type*, %struct.Use*, %struct.StringMapEntry* }
+%struct.Type = type { %struct.LLVMContext*, i8, [3 x i8], i32, {}* }
+%struct.LLVMContext = type { %struct.LLVMContextImpl* }
+%struct.LLVMContextImpl = type opaque
+%struct.Use = type { %struct.Value*, %struct.Use*, %struct.PointerIntPair }
+%struct.PointerIntPair = type { i64 }
+%struct.StringMapEntry = type opaque
+%struct.ilist_node.24 = type { %struct.ilist_half_node.23, %struct.BasicBlock* }
+%struct.ilist_half_node.23 = type { %struct.BasicBlock* }
+%struct.iplist.22 = type { %struct.ilist_traits.21, %struct.Instruction* }
+%struct.ilist_traits.21 = type { %struct.ilist_half_node.25 }
+%struct.ilist_half_node.25 = type { %struct.Instruction* }
+%struct.Instruction = type { [52 x i8], %struct.ilist_node.26, %struct.BasicBlock*, %struct.DebugLoc }
+%struct.ilist_node.26 = type { %struct.ilist_half_node.25, %struct.Instruction* }
+%struct.DebugLoc = type { i32, i32 }
+%struct.Function = type { %struct.GlobalValue, %struct.ilist_node.14, %struct.iplist.4, %struct.iplist, %struct.ValueSymbolTable*, %struct.AttrListPtr }
+%struct.GlobalValue = type <{ [52 x i8], [4 x i8], %struct.Module*, i8, i16, [5 x i8], %struct.basic_string }>
+%struct.Module = type { %struct.LLVMContext*, %struct.iplist.20, %struct.iplist.16, %struct.iplist.12, %struct.vector.2, %struct.ilist, %struct.basic_string, %struct.ValueSymbolTable*, %struct.OwningPtr, %struct.basic_string, %struct.basic_string, %struct.basic_string, i8* }
+%struct.iplist.20 = type { %struct.ilist_traits.19, %struct.GlobalVariable* }
+%struct.ilist_traits.19 = type { %struct.ilist_node.18 }
+%struct.ilist_node.18 = type { %struct.ilist_half_node.17, %struct.GlobalVariable* }
+%struct.ilist_half_node.17 = type { %struct.GlobalVariable* }
+%struct.GlobalVariable = type { %struct.GlobalValue, %struct.ilist_node.18, i8, [7 x i8] }
+%struct.iplist.16 = type { %struct.ilist_traits.15, %struct.Function* }
+%struct.ilist_traits.15 = type { %struct.ilist_node.14 }
+%struct.ilist_node.14 = type { %struct.ilist_half_node.13, %struct.Function* }
+%struct.ilist_half_node.13 = type { %struct.Function* }
+%struct.iplist.12 = type { %struct.ilist_traits.11, %struct.GlobalAlias* }
+%struct.ilist_traits.11 = type { %struct.ilist_node.10 }
+%struct.ilist_node.10 = type { %struct.ilist_half_node.9, %struct.GlobalAlias* }
+%struct.ilist_half_node.9 = type { %struct.GlobalAlias* }
+%struct.GlobalAlias = type { %struct.GlobalValue, %struct.ilist_node.10 }
+%struct.vector.2 = type { %struct._Vector_base.1 }
+%struct._Vector_base.1 = type { %struct._Vector_impl.0 }
+%struct._Vector_impl.0 = type { %struct.basic_string*, %struct.basic_string*, %struct.basic_string* }
+%struct.basic_string = type { %struct._Alloc_hider }
+%struct._Alloc_hider = type { i8* }
+%struct.ilist = type { %struct.iplist.8 }
+%struct.iplist.8 = type { %struct.ilist_traits.7, %struct.NamedMDNode* }
+%struct.ilist_traits.7 = type { %struct.ilist_node.6 }
+%struct.ilist_node.6 = type { %struct.ilist_half_node.5, %struct.NamedMDNode* }
+%struct.ilist_half_node.5 = type { %struct.NamedMDNode* }
+%struct.NamedMDNode = type { %struct.ilist_node.6, %struct.basic_string, %struct.Module*, i8* }
+%struct.ValueSymbolTable = type opaque
+%struct.OwningPtr = type { %struct.GVMaterializer* }
+%struct.GVMaterializer = type opaque
+%struct.iplist.4 = type { %struct.ilist_traits.3, %struct.BasicBlock* }
+%struct.ilist_traits.3 = type { %struct.ilist_half_node.23 }
+%struct.iplist = type { %struct.ilist_traits, %struct.Argument* }
+%struct.ilist_traits = type { %struct.ilist_half_node }
+%struct.ilist_half_node = type { %struct.Argument* }
+%struct.Argument = type { %struct.Value, %struct.ilist_node, %struct.Function* }
+%struct.ilist_node = type { %struct.ilist_half_node, %struct.Argument* }
+%struct.AttrListPtr = type { %struct.AttributeListImpl* }
+%struct.AttributeListImpl = type opaque
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+; CHECK: _ZSt9iter_swapIPSt4pairIPN4llvm10BasicBlockEjES5_EvT_T0_
+; CHECK: store
+; CHECK: ret void
+define void @_ZSt9iter_swapIPSt4pairIPN4llvm10BasicBlockEjES5_EvT_T0_(%struct.pair.162* %__a, %struct.pair.162* %__b) nounwind uwtable inlinehint {
+entry:
+  %memtmp = alloca %struct.pair.162, align 8
+  %0 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %memtmp, i64 0, i32 0
+  %1 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %__a, i64 0, i32 0
+  %2 = load %struct.BasicBlock*, %struct.BasicBlock** %1, align 8
+  store %struct.BasicBlock* %2, %struct.BasicBlock** %0, align 8
+  %3 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %memtmp, i64 0, i32 1
+  %4 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %__a, i64 0, i32 1
+  %5 = load i32, i32* %4, align 4
+  store i32 %5, i32* %3, align 8
+  %6 = bitcast %struct.pair.162* %__a to i8*
+  %7 = bitcast %struct.pair.162* %__b to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 12, i1 false)
+  %8 = bitcast %struct.pair.162* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 12, i1 false)
+  ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; RUN: opt < %s -basicaa -dse -S -enable-dse-partial-overwrite-tracking | FileCheck %s
+; PR28588
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @_UPT_destroy(i8* nocapture %ptr) local_unnamed_addr #0 {
+entry:
+  %edi = getelementptr inbounds i8, i8* %ptr, i64 8
+
+; CHECK-NOT: tail call void @llvm.memset.p0i8.i64(i8* align 8 %edi, i8 0, i64 176, i1 false)
+; CHECK-NOT: store i32 -1, i32* %addr
+
+  tail call void @llvm.memset.p0i8.i64(i8* align 8 %edi, i8 0, i64 176, i1 false)
+  %format4.i = getelementptr inbounds i8, i8* %ptr, i64 144
+  %addr = bitcast i8* %format4.i to i32*
+  store i32 -1, i32* %addr, align 8
+
+; CHECK: tail call void @free
+  tail call void @free(i8* nonnull %ptr)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @free(i8* nocapture) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }

Added: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,393 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+define void @write4to7(i32* nocapture %p) {
+; CHECK-LABEL: @write4to7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
+; CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false)
+  %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @write4to7_atomic(i32* nocapture %p) {
+; CHECK-LABEL: @write4to7_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
+; CHECK-NEXT:    store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4)
+  %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
+  store atomic i32 1, i32* %arrayidx1 unordered, align 4
+  ret void
+}
+
+define void @write0to3(i32* nocapture %p) {
+; CHECK-LABEL: @write0to3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    store i32 1, i32* [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false)
+  store i32 1, i32* %p, align 4
+  ret void
+}
+
+define void @write0to3_atomic(i32* nocapture %p) {
+; CHECK-LABEL: @write0to3_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
+; CHECK-NEXT:    store atomic i32 1, i32* [[P]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4)
+  store atomic i32 1, i32* %p unordered, align 4
+  ret void
+}
+
+; Atomicity of the store is weaker from the memset
+define void @write0to3_atomic_weaker(i32* nocapture %p) {
+; CHECK-LABEL: @write0to3_atomic_weaker(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
+; CHECK-NEXT:    store i32 1, i32* [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4)
+  store i32 1, i32* %p, align 4
+  ret void
+}
+
+define void @write0to7(i32* nocapture %p) {
+; CHECK-LABEL: @write0to7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 8
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i64*
+; CHECK-NEXT:    store i64 1, i64* [[P4]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false)
+  %p4 = bitcast i32* %p to i64*
+  store i64 1, i64* %p4, align 8
+  ret void
+}
+
+; Changing the memset start and length is okay here because the
+; store is a multiple of the memset element size
+define void @write0to7_atomic(i32* nocapture %p) {
+; CHECK-LABEL: @write0to7_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 8
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
+; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i64*
+; CHECK-NEXT:    store atomic i64 1, i64* [[P4]] unordered, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4)
+  %p4 = bitcast i32* %p to i64*
+  store atomic i64 1, i64* %p4 unordered, align 8
+  ret void
+}
+
+define void @write0to7_2(i32* nocapture %p) {
+; CHECK-LABEL: @write0to7_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i64*
+; CHECK-NEXT:    store i64 1, i64* [[P4]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false)
+  %p4 = bitcast i32* %p to i64*
+  store i64 1, i64* %p4, align 8
+  ret void
+}
+
+define void @write0to7_2_atomic(i32* nocapture %p) {
+; CHECK-LABEL: @write0to7_2_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
+; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i64*
+; CHECK-NEXT:    store atomic i64 1, i64* [[P4]] unordered, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4)
+  %p4 = bitcast i32* %p to i64*
+  store atomic i64 1, i64* %p4 unordered, align 8
+  ret void
+}
+
+; We do not trim the beginning of the eariler write if the alignment of the
+; start pointer is changed.
+define void @dontwrite0to3_align8(i32* nocapture %p) {
+; CHECK-LABEL: @dontwrite0to3_align8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[P3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    store i32 1, i32* [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %p3, i8 0, i64 32, i1 false)
+  store i32 1, i32* %p, align 4
+  ret void
+}
+
+define void @dontwrite0to3_align8_atomic(i32* nocapture %p) {
+; CHECK-LABEL: @dontwrite0to3_align8_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[P3]], i8 0, i64 32, i32 4)
+; CHECK-NEXT:    store atomic i32 1, i32* [[P]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %p3, i8 0, i64 32, i32 4)
+  store atomic i32 1, i32* %p unordered, align 4
+  ret void
+}
+
+define void @dontwrite0to1(i32* nocapture %p) {
+; CHECK-LABEL: @dontwrite0to1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i16*
+; CHECK-NEXT:    store i16 1, i16* [[P4]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false)
+  %p4 = bitcast i32* %p to i16*
+  store i16 1, i16* %p4, align 4
+  ret void
+}
+
+define void @dontwrite0to1_atomic(i32* nocapture %p) {
+; CHECK-LABEL: @dontwrite0to1_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i32 4)
+; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i16*
+; CHECK-NEXT:    store atomic i16 1, i16* [[P4]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4)
+  %p4 = bitcast i32* %p to i16*
+  store atomic i16 1, i16* %p4 unordered, align 4
+  ret void
+}
+
+define void @dontwrite2to9(i32* nocapture %p) {
+; CHECK-LABEL: @dontwrite2to9(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i16*
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[P4]], i64 1
+; CHECK-NEXT:    [[P5:%.*]] = bitcast i16* [[ARRAYIDX2]] to i64*
+; CHECK-NEXT:    store i64 1, i64* [[P5]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false)
+  %p4 = bitcast i32* %p to i16*
+  %arrayidx2 = getelementptr inbounds i16, i16* %p4, i64 1
+  %p5 = bitcast i16* %arrayidx2 to i64*
+  store i64 1, i64* %p5, align 8
+  ret void
+}
+
+define void @dontwrite2to9_atomic(i32* nocapture %p) {
+; CHECK-LABEL: @dontwrite2to9_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i32 4)
+; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i16*
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[P4]], i64 1
+; CHECK-NEXT:    [[P5:%.*]] = bitcast i16* [[ARRAYIDX2]] to i64*
+; CHECK-NEXT:    store atomic i64 1, i64* [[P5]] unordered, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4)
+  %p4 = bitcast i32* %p to i16*
+  %arrayidx2 = getelementptr inbounds i16, i16* %p4, i64 1
+  %p5 = bitcast i16* %arrayidx2 to i64*
+  store atomic i64 1, i64* %p5 unordered, align 8
+  ret void
+}
+
+define void @write8To15AndThen0To7(i64* nocapture %P) {
+; CHECK-LABEL: @write8To15AndThen0To7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16
+; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i1 false)
+; CHECK-NEXT:    [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0
+; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
+; CHECK-NEXT:    store i64 1, i64* [[BASE64_1]]
+; CHECK-NEXT:    store i64 2, i64* [[BASE64_0]]
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %base0 = bitcast i64* %P to i8*
+  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
+  tail call void @llvm.memset.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i1 false)
+
+  %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
+  %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
+
+  store i64 1, i64* %base64_1
+  store i64 2, i64* %base64_0
+  ret void
+}
+
+define void @write8To15AndThen0To7_atomic(i64* nocapture %P) {
+; CHECK-LABEL: @write8To15AndThen0To7_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16
+; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8)
+; CHECK-NEXT:    [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0
+; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
+; CHECK-NEXT:    store atomic i64 1, i64* [[BASE64_1]] unordered, align 8
+; CHECK-NEXT:    store atomic i64 2, i64* [[BASE64_0]] unordered, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %base0 = bitcast i64* %P to i8*
+  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
+  tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8)
+
+  %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
+  %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
+
+  store atomic i64 1, i64* %base64_1 unordered, align 8
+  store atomic i64 2, i64* %base64_0 unordered, align 8
+  ret void
+}
+
+define void @write8To15AndThen0To7_atomic_weaker(i64* nocapture %P) {
+; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16
+; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8)
+; CHECK-NEXT:    [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0
+; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
+; CHECK-NEXT:    store atomic i64 1, i64* [[BASE64_1]] unordered, align 8
+; CHECK-NEXT:    store i64 2, i64* [[BASE64_0]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %base0 = bitcast i64* %P to i8*
+  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
+  tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8)
+
+  %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
+  %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
+
+  store atomic i64 1, i64* %base64_1 unordered, align 8
+  store i64 2, i64* %base64_0, align 8
+  ret void
+}
+
+define void @write8To15AndThen0To7_atomic_weaker_2(i64* nocapture %P) {
+; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16
+; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8)
+; CHECK-NEXT:    [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0
+; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
+; CHECK-NEXT:    store i64 1, i64* [[BASE64_1]], align 8
+; CHECK-NEXT:    store atomic i64 2, i64* [[BASE64_0]] unordered, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %base0 = bitcast i64* %P to i8*
+  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
+  tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8)
+
+  %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
+  %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
+
+  store i64 1, i64* %base64_1, align 8
+  store atomic i64 2, i64* %base64_0 unordered, align 8
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind
+

Added: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,390 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.vec2 = type { <4 x i32>, <4 x i32> }
+%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
+
+ at glob1 = global %struct.vec2 zeroinitializer, align 16
+ at glob2 = global %struct.vec2plusi zeroinitializer, align 16
+
+define void @write24to28(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @write24to28(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
+; CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false)
+  %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @write24to28_atomic(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @write24to28_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 24, i32 4)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
+; CHECK-NEXT:    store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4)
+  %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
+  store atomic i32 1, i32* %arrayidx1 unordered, align 4
+  ret void
+}
+
+; Atomicity of the store is weaker from the memset
+define void @write24to28_atomic_weaker(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @write24to28_atomic_weaker(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 24, i32 4)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
+; CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4)
+  %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @write28to32(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @write28to32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
+; CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false)
+  %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @write28to32_atomic(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @write28to32_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i32 4)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
+; CHECK-NEXT:    store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4)
+  %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
+  store atomic i32 1, i32* %arrayidx1 unordered, align 4
+  ret void
+}
+
+define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @dontwrite28to32memset(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 16 [[P3]], i8 0, i64 32, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
+; CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 16 %p3, i8 0, i64 32, i1 false)
+  %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @dontwrite28to32memset_atomic(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @dontwrite28to32memset_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 16 [[P3]], i8 0, i64 32, i32 4)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
+; CHECK-NEXT:    store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p3 = bitcast i32* %p to i8*
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 16 %p3, i8 0, i64 32, i32 4)
+  %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 7
+  store atomic i32 1, i32* %arrayidx1 unordered, align 4
+  ret void
+}
+
+define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @write32to36(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %struct.vec2plusi* [[P:%.*]] to i8*
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i1 false)
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], %struct.vec2plusi* [[P]], i64 0, i32 2
+; CHECK-NEXT:    store i32 1, i32* [[C]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = bitcast %struct.vec2plusi* %p to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i1 false)
+  %c = getelementptr inbounds %struct.vec2plusi, %struct.vec2plusi* %p, i64 0, i32 2
+  store i32 1, i32* %c, align 4
+  ret void
+}
+
+define void @write32to36_atomic(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @write32to36_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %struct.vec2plusi* [[P:%.*]] to i8*
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 4)
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], %struct.vec2plusi* [[P]], i64 0, i32 2
+; CHECK-NEXT:    store atomic i32 1, i32* [[C]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = bitcast %struct.vec2plusi* %p to i8*
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 4)
+  %c = getelementptr inbounds %struct.vec2plusi, %struct.vec2plusi* %p, i64 0, i32 2
+  store atomic i32 1, i32* %c unordered, align 4
+  ret void
+}
+
+; Atomicity of the store is weaker than the memcpy
+define void @write32to36_atomic_weaker(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @write32to36_atomic_weaker(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %struct.vec2plusi* [[P:%.*]] to i8*
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 4)
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2PLUSI:%.*]], %struct.vec2plusi* [[P]], i64 0, i32 2
+; CHECK-NEXT:    store i32 1, i32* [[C]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = bitcast %struct.vec2plusi* %p to i8*
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 4)
+  %c = getelementptr inbounds %struct.vec2plusi, %struct.vec2plusi* %p, i64 0, i32 2
+  store i32 1, i32* %c, align 4
+  ret void
+}
+
+define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @write16to32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %struct.vec2* [[P:%.*]] to i8*
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 16, i1 false)
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], %struct.vec2* [[P]], i64 0, i32 1
+; CHECK-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* [[C]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = bitcast %struct.vec2* %p to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false)
+  %c = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 1
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4
+  ret void
+}
+
+define void @write16to32_atomic(%struct.vec2* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @write16to32_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %struct.vec2* [[P:%.*]] to i8*
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 4)
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], %struct.vec2* [[P]], i64 0, i32 1
+; CHECK-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* [[C]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = bitcast %struct.vec2* %p to i8*
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 4)
+  %c = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 1
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4
+  ret void
+}
+
+define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @dontwrite28to32memcpy(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %struct.vec2* [[P:%.*]] to i8*
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], %struct.vec2* [[P]], i64 0, i32 0, i64 7
+; CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = bitcast %struct.vec2* %p to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i1 false)
+  %arrayidx1 = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 0, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @dontwrite28to32memcpy_atomic(%struct.vec2* nocapture %p) nounwind uwtable ssp {
+; CHECK-LABEL: @dontwrite28to32memcpy_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %struct.vec2* [[P:%.*]] to i8*
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 4)
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], %struct.vec2* [[P]], i64 0, i32 0, i64 7
+; CHECK-NEXT:    store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = bitcast %struct.vec2* %p to i8*
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 4)
+  %arrayidx1 = getelementptr inbounds %struct.vec2, %struct.vec2* %p, i64 0, i32 0, i64 7
+  store atomic i32 1, i32* %arrayidx1 unordered, align 4
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind
+
+%struct.trapframe = type { i64, i64, i64 }
+
+; bugzilla 11455 - make sure negative GEP's don't break this optimisation
+define void @cpu_lwp_fork(%struct.trapframe* %md_regs, i64 %pcb_rsp0) nounwind uwtable noinline ssp {
+; CHECK-LABEL: @cpu_lwp_fork(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = inttoptr i64 [[PCB_RSP0:%.*]] to %struct.trapframe*
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds [[STRUCT_TRAPFRAME:%.*]], %struct.trapframe* [[TMP0]], i64 -1
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %struct.trapframe* [[ADD_PTR]] to i8*
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %struct.trapframe* [[MD_REGS:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 24, i1 false)
+; CHECK-NEXT:    [[TF_TRAPNO:%.*]] = getelementptr inbounds [[STRUCT_TRAPFRAME]], %struct.trapframe* [[TMP0]], i64 -1, i32 1
+; CHECK-NEXT:    store i64 3, i64* [[TF_TRAPNO]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = inttoptr i64 %pcb_rsp0 to %struct.trapframe*
+  %add.ptr = getelementptr inbounds %struct.trapframe, %struct.trapframe* %0, i64 -1
+  %1 = bitcast %struct.trapframe* %add.ptr to i8*
+  %2 = bitcast %struct.trapframe* %md_regs to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i1 false)
+  %tf_trapno = getelementptr inbounds %struct.trapframe, %struct.trapframe* %0, i64 -1, i32 1
+  store i64 3, i64* %tf_trapno, align 8
+  ret void
+}
+
+define void @write16To23AndThen24To31(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
+; CHECK-LABEL: @write16To23AndThen24To31(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
+; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 16, i1 false)
+; CHECK-NEXT:    [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
+; CHECK-NEXT:    [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3
+; CHECK-NEXT:    store i64 3, i64* [[BASE64_2]]
+; CHECK-NEXT:    store i64 3, i64* [[BASE64_3]]
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %base0 = bitcast i64* %P to i8*
+  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
+  tail call void @llvm.memset.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i1 false)
+
+  %base64_2 = getelementptr inbounds i64, i64* %P, i64 2
+  %base64_3 = getelementptr inbounds i64, i64* %P, i64 3
+
+  store i64 3, i64* %base64_2
+  store i64 3, i64* %base64_3
+  ret void
+}
+
+define void @write16To23AndThen24To31_atomic(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
+; CHECK-LABEL: @write16To23AndThen24To31_atomic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
+; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 16, i32 8)
+; CHECK-NEXT:    [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
+; CHECK-NEXT:    [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3
+; CHECK-NEXT:    store atomic i64 3, i64* [[BASE64_2]] unordered, align 8
+; CHECK-NEXT:    store atomic i64 3, i64* [[BASE64_3]] unordered, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %base0 = bitcast i64* %P to i8*
+  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
+  tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8)
+
+  %base64_2 = getelementptr inbounds i64, i64* %P, i64 2
+  %base64_3 = getelementptr inbounds i64, i64* %P, i64 3
+
+  store atomic i64 3, i64* %base64_2 unordered, align 8
+  store atomic i64 3, i64* %base64_3 unordered, align 8
+  ret void
+}
+
+define void @write16To23AndThen24To31_atomic_weaker1(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
+; CHECK-LABEL: @write16To23AndThen24To31_atomic_weaker1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
+; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 16, i32 8)
+; CHECK-NEXT:    [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
+; CHECK-NEXT:    [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3
+; CHECK-NEXT:    store i64 3, i64* [[BASE64_2]], align 8
+; CHECK-NEXT:    store atomic i64 3, i64* [[BASE64_3]] unordered, align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %base0 = bitcast i64* %P to i8*
+  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
+  tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8)
+
+  %base64_2 = getelementptr inbounds i64, i64* %P, i64 2
+  %base64_3 = getelementptr inbounds i64, i64* %P, i64 3
+
+  store i64 3, i64* %base64_2, align 8
+  store atomic i64 3, i64* %base64_3 unordered, align 8
+  ret void
+}
+
+define void @write16To23AndThen24To31_atomic_weaker2(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
+; CHECK-LABEL: @write16To23AndThen24To31_atomic_weaker2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
+; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
+; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[MYBASE0]], i8 0, i64 16, i32 8)
+; CHECK-NEXT:    [[BASE64_2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
+; CHECK-NEXT:    [[BASE64_3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3
+; CHECK-NEXT:    store atomic i64 3, i64* [[BASE64_2]] unordered, align 8
+; CHECK-NEXT:    store i64 3, i64* [[BASE64_3]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+
+  %base0 = bitcast i64* %P to i8*
+  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
+  tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8)
+
+  %base64_2 = getelementptr inbounds i64, i64* %P, i64 2
+  %base64_3 = getelementptr inbounds i64, i64* %P, i64 3
+
+  store atomic i64 3, i64* %base64_2 unordered, align 8
+  store i64 3, i64* %base64_3, align 8
+  ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,87 @@
+; RUN: opt < %s -basicaa -dse -enable-dse-partial-store-merging=false -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Ensure that the dead store is deleted in this case.  It is wholely
+; overwritten by the second store.
+define void @test1(i32 *%V) {
+        %V2 = bitcast i32* %V to i8*            ; <i8*> [#uses=1]
+        store i8 0, i8* %V2
+        store i32 1234567, i32* %V
+        ret void
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: store i32 1234567
+}
+
+; Note that we could do better by merging the two stores into one.
+define void @test2(i32* %P) {
+; CHECK-LABEL: @test2(
+  store i32 0, i32* %P
+; CHECK: store i32
+  %Q = bitcast i32* %P to i16*
+  store i16 1, i16* %Q
+; CHECK: store i16
+  ret void
+}
+
+
+define i32 @test3(double %__x) {
+; CHECK-LABEL: @test3(
+; CHECK: store double
+  %__u = alloca { [3 x i32] }
+  %tmp.1 = bitcast { [3 x i32] }* %__u to double*
+  store double %__x, double* %tmp.1
+  %tmp.4 = getelementptr { [3 x i32] }, { [3 x i32] }* %__u, i32 0, i32 0, i32 1
+  %tmp.5 = load i32, i32* %tmp.4
+  %tmp.6 = icmp slt i32 %tmp.5, 0
+  %tmp.7 = zext i1 %tmp.6 to i32
+  ret i32 %tmp.7
+}
+
+; PR6043
+define void @test4(i8* %P) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: store double
+
+  store i8 19, i8* %P  ;; dead
+  %A = getelementptr i8, i8* %P, i32 3
+
+  store i8 42, i8* %A  ;; dead
+
+  %Q = bitcast i8* %P to double*
+  store double 0.0, double* %Q
+  ret void
+}
+
+; PR8657
+declare void @test5a(i32*)
+define void @test5(i32 %i) nounwind ssp {
+  %A = alloca i32
+  %B = bitcast i32* %A to i8*
+  %C = getelementptr i8, i8* %B, i32 %i
+  store i8 10, i8* %C        ;; Dead store to variable index.
+  store i32 20, i32* %A
+
+  call void @test5a(i32* %A)
+  ret void
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: alloca
+; CHECK-NEXT: store i32 20
+; CHECK-NEXT: call void @test5a
+}
+
+declare void @test5a_as1(i32*)
+define void @test5_addrspacecast(i32 %i) nounwind ssp {
+  %A = alloca i32
+  %B = addrspacecast i32* %A to i8 addrspace(1)*
+  %C = getelementptr i8, i8 addrspace(1)* %B, i32 %i
+  store i8 10, i8 addrspace(1)* %C        ;; Dead store to variable index.
+  store i32 20, i32* %A
+
+  call void @test5a(i32* %A)
+  ret void
+; CHECK-LABEL: @test5_addrspacecast(
+; CHECK-NEXT: alloca
+; CHECK-NEXT: store i32 20
+; CHECK-NEXT: call void @test5a
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -dse -S | FileCheck %s
+
+; Both stores should be emitted because we can't tell if the gather aliases.
+
+define <4 x i32> @bar(<4 x i32> %arg, i32* %arg1) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    store i32 5, i32* [[ARG1:%.*]]
+; CHECK-NEXT:    [[TMP:%.*]] = tail call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, i8* null, <4 x i32> [[ARG:%.*]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i8 1)
+; CHECK-NEXT:    store i32 10, i32* [[ARG1]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP]]
+;
+bb:
+  store i32 5, i32* %arg1
+  %tmp = tail call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, i8* null, <4 x i32> %arg, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i8 1)
+  store i32 10, i32* %arg1
+  ret <4 x i32> %tmp
+}
+
+declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8)

Added: llvm/trunk/test/Transforms/DeadStoreElimination/atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/atomic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/atomic.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/atomic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,132 @@
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; Sanity tests for atomic stores.
+; Note that it turns out essentially every transformation DSE does is legal on
+; atomic ops, just some transformations are not allowed across release-acquire pairs.
+
+ at x = common global i32 0, align 4
+ at y = common global i32 0, align 4
+
+declare void @randomop(i32*)
+
+; DSE across unordered store (allowed)
+define void @test1() {
+; CHECK-LABEL: test1
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  store atomic i32 0, i32* @y unordered, align 4
+  store i32 1, i32* @x
+  ret void
+}
+
+; DSE remove unordered store (allowed)
+define void @test4() {
+; CHECK-LABEL: test4
+; CHECK-NOT: store atomic
+; CHECK: store i32 1
+  store atomic i32 0, i32* @x unordered, align 4
+  store i32 1, i32* @x
+  ret void
+}
+
+; DSE unordered store overwriting non-atomic store (allowed)
+define void @test5() {
+; CHECK-LABEL: test5
+; CHECK: store atomic i32 1
+  store i32 0, i32* @x
+  store atomic i32 1, i32* @x unordered, align 4
+  ret void
+}
+
+; DSE no-op unordered atomic store (allowed)
+define void @test6() {
+; CHECK-LABEL: test6
+; CHECK-NOT: store
+; CHECK: ret void
+  %x = load atomic i32, i32* @x unordered, align 4
+  store atomic i32 %x, i32* @x unordered, align 4
+  ret void
+}
+
+; DSE seq_cst store (be conservative; DSE doesn't have infrastructure
+; to reason about atomic operations).
+define void @test7() {
+; CHECK-LABEL: test7
+; CHECK: store atomic
+  %a = alloca i32
+  store atomic i32 0, i32* %a seq_cst, align 4
+  ret void
+}
+
+; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure
+; to reason about atomic operations).
+define i32 @test8() {
+; CHECK-LABEL: test8
+; CHECK: store
+; CHECK: load atomic
+  %a = alloca i32
+  call void @randomop(i32* %a)
+  store i32 0, i32* %a, align 4
+  %x = load atomic i32, i32* @x seq_cst, align 4
+  ret i32 %x
+}
+
+; DSE across monotonic load (allowed as long as the eliminated store isUnordered)
+define i32 @test9() {
+; CHECK-LABEL: test9
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  %x = load atomic i32, i32* @y monotonic, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
+
+; DSE across monotonic store (allowed as long as the eliminated store isUnordered)
+define void @test10() {
+; CHECK-LABEL: test10
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  store atomic i32 42, i32* @y monotonic, align 4
+  store i32 1, i32* @x
+  ret void
+}
+
+; DSE across monotonic load (forbidden since the eliminated store is atomic)
+define i32 @test11() {
+; CHECK-LABEL: test11
+; CHECK: store atomic i32 0
+; CHECK: store atomic i32 1
+  store atomic i32 0, i32* @x monotonic, align 4
+  %x = load atomic i32, i32* @y monotonic, align 4
+  store atomic i32 1, i32* @x monotonic, align 4
+  ret i32 %x
+}
+
+; DSE across monotonic store (forbidden since the eliminated store is atomic)
+define void @test12() {
+; CHECK-LABEL: test12
+; CHECK: store atomic i32 0
+; CHECK: store atomic i32 1
+  store atomic i32 0, i32* @x monotonic, align 4
+  store atomic i32 42, i32* @y monotonic, align 4
+  store atomic i32 1, i32* @x monotonic, align 4
+  ret void
+}
+
+; But DSE is not allowed across a release-acquire pair.
+define i32 @test15() {
+; CHECK-LABEL: test15
+; CHECK: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  store atomic i32 0, i32* @y release, align 4
+  %x = load atomic i32, i32* @y acquire, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/calloc-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/calloc-store.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/calloc-store.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/calloc-store.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,65 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+declare noalias i8* @calloc(i64, i64)
+
+define i32* @test1() {
+; CHECK-LABEL: test1
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  %2 = bitcast i8* %1 to i32*
+  ; This store is dead and should be removed
+  store i32 0, i32* %2, align 4
+; CHECK-NOT: store i32 0, i32* %2, align 4
+  ret i32* %2
+}
+
+define i32* @test2() {
+; CHECK-LABEL: test2
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  %2 = bitcast i8* %1 to i32*
+  %3 = getelementptr i32, i32* %2, i32 5
+  store i32 0, i32* %3, align 4
+; CHECK-NOT: store i32 0, i32* %2, align 4
+  ret i32* %2
+}
+
+define i32* @test3(i32 *%arg) {
+; CHECK-LABEL: test3
+  store i32 0, i32* %arg, align 4
+; CHECK: store i32 0, i32* %arg, align 4
+  ret i32* %arg
+}
+
+declare void @clobber_memory(i8*)
+define i8* @test4() {
+; CHECK-LABEL: test4
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  call void @clobber_memory(i8* %1)
+  store i8 0, i8* %1, align 4
+; CHECK: store i8 0, i8* %1, align 4
+  ret i8* %1
+}
+
+define i32* @test5() {
+; CHECK-LABEL: test5
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  %2 = bitcast i8* %1 to i32*
+  store volatile i32 0, i32* %2, align 4
+; CHECK: store volatile i32 0, i32* %2, align 4
+  ret i32* %2
+}
+
+define i8* @test6() {
+; CHECK-LABEL: test6
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  store i8 5, i8* %1, align 4
+; CHECK: store i8 5, i8* %1, align 4
+  ret i8* %1
+}
+
+define i8* @test7(i8 %arg) {
+; CHECK-LABEL: test7
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  store i8 %arg, i8* %1, align 4
+; CHECK: store i8 %arg, i8* %1, align 4
+  ret i8* %1
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,239 @@
+; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+%"struct.std::complex" = type { { float, float } }
+
+define void @_Z4testSt7complexIfE(%"struct.std::complex"* noalias nocapture sret %agg.result, i64 %c.coerce) {
+entry:
+; CHECK-LABEL: @_Z4testSt7complexIfE
+
+  %ref.tmp = alloca i64, align 8
+  %tmpcast = bitcast i64* %ref.tmp to %"struct.std::complex"*
+  %c.sroa.0.0.extract.shift = lshr i64 %c.coerce, 32
+  %c.sroa.0.0.extract.trunc = trunc i64 %c.sroa.0.0.extract.shift to i32
+  %0 = bitcast i32 %c.sroa.0.0.extract.trunc to float
+  %c.sroa.2.0.extract.trunc = trunc i64 %c.coerce to i32
+  %1 = bitcast i32 %c.sroa.2.0.extract.trunc to float
+  call void @_Z3barSt7complexIfE(%"struct.std::complex"* nonnull sret %tmpcast, i64 %c.coerce)
+  %2 = bitcast %"struct.std::complex"* %agg.result to i64*
+  %3 = load i64, i64* %ref.tmp, align 8
+  store i64 %3, i64* %2, align 4
+; CHECK-NOT: store i64
+
+  %_M_value.realp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %agg.result, i64 0, i32 0, i32 0
+  %4 = lshr i64 %3, 32
+  %5 = trunc i64 %4 to i32
+  %6 = bitcast i32 %5 to float
+  %_M_value.imagp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %agg.result, i64 0, i32 0, i32 1
+  %7 = trunc i64 %3 to i32
+  %8 = bitcast i32 %7 to float
+  %mul_ad.i.i = fmul fast float %6, %1
+  %mul_bc.i.i = fmul fast float %8, %0
+  %mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
+  %mul_ac.i.i = fmul fast float %6, %0
+  %mul_bd.i.i = fmul fast float %8, %1
+  %mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
+  store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4
+  store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4
+  ret void
+; CHECK: ret void
+}
+
+declare void @_Z3barSt7complexIfE(%"struct.std::complex"* sret, i64)
+
+define void @test1(i32 *%ptr) {
+entry:
+; CHECK-LABEL: @test1
+
+ store i32 5, i32* %ptr
+ %bptr = bitcast i32* %ptr to i8*
+ store i8 7, i8* %bptr
+ %wptr = bitcast i32* %ptr to i16*
+ store i16 -30062, i16* %wptr
+ %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+ store i8 25, i8* %bptr2
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+ store i8 47, i8* %bptr3
+ %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+ %wptrp = bitcast i8* %bptr1 to i16*
+ store i16 2020, i16* %wptrp, align 1
+ ret void
+
+; CHECK-NOT: store i32 5, i32* %ptr
+; CHECK-NOT: store i8 7, i8* %bptr
+; CHECK: store i16 -30062, i16* %wptr
+; CHECK-NOT: store i8 25, i8* %bptr2
+; CHECK: store i8 47, i8* %bptr3
+; CHECK: store i16 2020, i16* %wptrp, align 1
+
+; CHECK: ret void
+}
+
+define void @test2(i32 *%ptr) {
+entry:
+; CHECK-LABEL: @test2
+
+  store i32 5, i32* %ptr
+
+  %bptr = bitcast i32* %ptr to i8*
+  %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+  %wptr = bitcast i8* %bptr to i16*
+  %wptrm1 = bitcast i8* %bptrm1 to i16*
+  %wptr1 = bitcast i8* %bptr1 to i16*
+  %wptr2 = bitcast i8* %bptr2 to i16*
+  %wptr3 = bitcast i8* %bptr3 to i16*
+
+  store i16 1456, i16* %wptrm1, align 1
+  store i16 1346, i16* %wptr, align 1
+  store i16 1756, i16* %wptr1, align 1
+  store i16 1126, i16* %wptr2, align 1
+  store i16 5656, i16* %wptr3, align 1
+
+; CHECK-NOT: store i32 5, i32* %ptr
+
+; CHECK: store i16 1456, i16* %wptrm1, align 1
+; CHECK: store i16 1346, i16* %wptr, align 1
+; CHECK: store i16 1756, i16* %wptr1, align 1
+; CHECK: store i16 1126, i16* %wptr2, align 1
+; CHECK: store i16 5656, i16* %wptr3, align 1
+
+  ret void
+
+; CHECK: ret void
+}
+
+define signext i8 @test3(i32 *%ptr) {
+entry:
+; CHECK-LABEL: @test3
+
+  store i32 5, i32* %ptr
+
+  %bptr = bitcast i32* %ptr to i8*
+  %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+  %wptr = bitcast i8* %bptr to i16*
+  %wptrm1 = bitcast i8* %bptrm1 to i16*
+  %wptr1 = bitcast i8* %bptr1 to i16*
+  %wptr2 = bitcast i8* %bptr2 to i16*
+  %wptr3 = bitcast i8* %bptr3 to i16*
+
+  %v = load i8, i8* %bptr, align 1
+  store i16 1456, i16* %wptrm1, align 1
+  store i16 1346, i16* %wptr, align 1
+  store i16 1756, i16* %wptr1, align 1
+  store i16 1126, i16* %wptr2, align 1
+  store i16 5656, i16* %wptr3, align 1
+
+; CHECK: store i32 5, i32* %ptr
+
+  ret i8 %v
+
+; CHECK: ret i8 %v
+}
+
+%struct.foostruct = type {
+i32 (i8*, i8**, i32, i8, i8*)*,
+i32 (i8*, i8**, i32, i8, i8*)*,
+i32 (i8*, i8**, i32, i8, i8*)*,
+i32 (i8*, i8**, i32, i8, i8*)*,
+void (i8*, i32, i32)*
+}
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
+declare void @goFunc(%struct.foostruct*)
+declare i32 @fa(i8*, i8**, i32, i8, i8*)
+
+define void @test4()  {
+entry:
+; CHECK-LABEL: @test4
+
+  %bang = alloca %struct.foostruct, align 8
+  %v1 = bitcast %struct.foostruct* %bang to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %v1, i8 0, i64 40, i1 false)
+  %v2 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 0
+  store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v2, align 8
+  %v3 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 1
+  store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v3, align 8
+  %v4 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 2
+  store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v4, align 8
+  %v5 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 3
+  store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v5, align 8
+  %v6 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 4
+  store void (i8*, i32, i32)* null, void (i8*, i32, i32)** %v6, align 8
+  call void @goFunc(%struct.foostruct* %bang)
+  ret void
+
+; CHECK-NOT: memset
+; CHECK: ret void
+}
+
+define signext i8 @test5(i32 *%ptr) {
+entry:
+; CHECK-LABEL: @test5
+
+  store i32 0, i32* %ptr
+
+  %bptr = bitcast i32* %ptr to i8*
+  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+  %wptr = bitcast i8* %bptr to i16*
+  %wptr1 = bitcast i8* %bptr1 to i16*
+  %wptr2 = bitcast i8* %bptr2 to i16*
+
+  store i16 65535, i16* %wptr2, align 1
+  store i16 1456, i16* %wptr1, align 1
+  store i16 1346, i16* %wptr, align 1
+
+; CHECK-NOT: store i32 0, i32* %ptr
+
+  ret i8 0
+}
+
+define signext i8 @test6(i32 *%ptr) {
+entry:
+; CHECK-LABEL: @test6
+
+  store i32 0, i32* %ptr
+
+  %bptr = bitcast i32* %ptr to i16*
+  %bptr1 = getelementptr inbounds i16, i16* %bptr, i64 0
+  %bptr2 = getelementptr inbounds i16, i16* %bptr, i64 1
+
+  store i16 1456, i16* %bptr2, align 1
+  store i16 65535, i16* %bptr1, align 1
+
+; CHECK-NOT: store i32 0, i32* %ptr
+
+  ret i8 0
+}
+
+define signext i8 @test7(i64 *%ptr) {
+entry:
+; CHECK-LABEL: @test7
+
+  store i64 0, i64* %ptr
+
+  %bptr = bitcast i64* %ptr to i16*
+  %bptr1 = getelementptr inbounds i16, i16* %bptr, i64 0
+  %bptr2 = getelementptr inbounds i16, i16* %bptr, i64 1
+  %bptr3 = getelementptr inbounds i16, i16* %bptr, i64 2
+  %bptr4 = getelementptr inbounds i16, i16* %bptr, i64 3
+
+  store i16 1346, i16* %bptr1, align 1
+  store i16 1756, i16* %bptr3, align 1
+  store i16 1456, i16* %bptr2, align 1
+  store i16 5656, i16* %bptr4, align 1
+
+; CHECK-NOT: store i64 0, i64* %ptr
+
+  ret i8 0
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/const-pointers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/const-pointers.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/const-pointers.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/const-pointers.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%t = type { i32 }
+
+ at g = global i32 42
+
+define void @test1(%t* noalias %pp) {
+  %p = getelementptr inbounds %t, %t* %pp, i32 0, i32 0
+
+  store i32 1, i32* %p; <-- This is dead
+  %x = load i32, i32* inttoptr (i32 12345 to i32*)
+  store i32 %x, i32* %p
+  ret void
+; CHECK-LABEL: define void @test1(
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+define void @test3() {
+  store i32 1, i32* @g; <-- This is dead.
+  store i32 42, i32* @g
+  ret void
+; CHECK-LABEL: define void @test3(
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+define void @test4(i32* %p) {
+  store i32 1, i32* %p
+  %x = load i32, i32* @g; <-- %p and @g could alias
+  store i32 %x, i32* %p
+  ret void
+; CHECK-LABEL: define void @test4(
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/crash.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,74 @@
+; RUN: opt < %s -basicaa -dse -S
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+ at g80 = external global i8                         ; <i8*> [#uses=3]
+
+declare signext i8 @foo(i8 signext, i8 signext) nounwind readnone ssp
+
+declare i32 @func68(i32) nounwind readonly ssp
+
+; PR4815
+define void @test1(i32 %int32p54) noreturn nounwind ssp {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %storemerge = phi i8 [ %2, %bb ], [ 1, %entry ] ; <i8> [#uses=1]
+  store i8 %storemerge, i8* @g80
+  %0 = tail call i32 @func68(i32 1) nounwind ssp  ; <i32> [#uses=1]
+  %1 = trunc i32 %0 to i8                         ; <i8> [#uses=1]
+  store i8 %1, i8* @g80, align 1
+  store i8 undef, i8* @g80, align 1
+  %2 = tail call signext i8 @foo(i8 signext undef, i8 signext 1) nounwind ; <i8> [#uses=1]
+  br label %bb
+}
+
+define fastcc i32 @test2() nounwind ssp {
+bb14:                                             ; preds = %bb4
+  %0 = bitcast i8* undef to i8**                  ; <i8**> [#uses=1]
+  %1 = getelementptr inbounds i8*, i8** %0, i64 undef  ; <i8**> [#uses=1]
+  %2 = bitcast i8** %1 to i16*                    ; <i16*> [#uses=2]
+  %3 = getelementptr inbounds i16, i16* %2, i64 undef  ; <i16*> [#uses=1]
+  %4 = bitcast i16* %3 to i8*                     ; <i8*> [#uses=1]
+  %5 = getelementptr inbounds i8, i8* %4, i64 undef   ; <i8*> [#uses=1]
+  %6 = getelementptr inbounds i16, i16* %2, i64 undef  ; <i16*> [#uses=1]
+  store i16 undef, i16* %6, align 2
+  %7 = getelementptr inbounds i8, i8* %5, i64 undef   ; <i8*> [#uses=1]
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* undef, i64 undef, i1 false)
+  unreachable
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+
+; rdar://7635088
+define i32 @test3() {
+entry:
+  ret i32 0
+  
+dead:
+  %P2 = getelementptr i32, i32 *%P2, i32 52
+  %Q2 = getelementptr i32, i32 *%Q2, i32 52
+  store i32 4, i32* %P2
+  store i32 4, i32* %Q2
+  br label %dead
+}
+
+
+; PR3141
+%struct.ada__tags__dispatch_table = type { [1 x i32] }
+%struct.f393a00_1__object = type { %struct.ada__tags__dispatch_table*, i8 }
+%struct.f393a00_2__windmill = type { %struct.f393a00_1__object, i16 }
+
+define void @test4(%struct.f393a00_2__windmill* %a, %struct.f393a00_2__windmill* %b) {
+entry:
+	%t = alloca %struct.f393a00_2__windmill		; <%struct.f393a00_2__windmill*> [#uses=1]
+	%0 = getelementptr %struct.f393a00_2__windmill, %struct.f393a00_2__windmill* %t, i32 0, i32 0, i32 0		; <%struct.ada__tags__dispatch_table**> [#uses=1]
+	%1 = load %struct.ada__tags__dispatch_table*, %struct.ada__tags__dispatch_table** null, align 4		; <%struct.ada__tags__dispatch_table*> [#uses=1]
+	%2 = load %struct.ada__tags__dispatch_table*, %struct.ada__tags__dispatch_table** %0, align 8		; <%struct.ada__tags__dispatch_table*> [#uses=1]
+	store %struct.ada__tags__dispatch_table* %2, %struct.ada__tags__dispatch_table** null, align 4
+	store %struct.ada__tags__dispatch_table* %1, %struct.ada__tags__dispatch_table** null, align 4
+	ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/cs-cs-aliasing.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,74 @@
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.basic_string = type { %"class.__gnu_cxx::__versa_string" }
+%"class.__gnu_cxx::__versa_string" = type { %"class.__gnu_cxx::__sso_string_base" }
+%"class.__gnu_cxx::__sso_string_base" = type { %"struct.__gnu_cxx::__vstring_utility<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider", i64, %union.anon }
+%"struct.__gnu_cxx::__vstring_utility<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%union.anon = type { i64, [8 x i8] }
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0
+
+; Function Attrs: noinline nounwind readonly uwtable
+declare zeroext i1 @callee_takes_string(%class.basic_string* nonnull) #1 align 2
+
+; Function Attrs: nounwind uwtable
+define weak_odr zeroext i1 @test() #2 align 2 {
+
+; CHECK-LABEL: @test
+
+bb:
+  %tmp = alloca %class.basic_string, align 8
+  %tmp1 = alloca %class.basic_string, align 8
+  %tmp3 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp, i64 0, i32 0, i32 0, i32 2
+  %tmp4 = bitcast %union.anon* %tmp3 to i8*
+  %tmp5 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp, i64 0, i32 0, i32 0, i32 0, i32 0
+  %tmp6 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp, i64 0, i32 0, i32 0, i32 1
+  %tmp7 = getelementptr inbounds i8, i8* %tmp4, i64 1
+  %tmp8 = bitcast %class.basic_string* %tmp to i8*
+  %tmp9 = bitcast i64 0 to i64
+  %tmp10 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp1, i64 0, i32 0, i32 0, i32 2
+  %tmp11 = bitcast %union.anon* %tmp10 to i8*
+  %tmp12 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp1, i64 0, i32 0, i32 0, i32 0, i32 0
+  %tmp13 = getelementptr inbounds %class.basic_string, %class.basic_string* %tmp1, i64 0, i32 0, i32 0, i32 1
+  %tmp14 = getelementptr inbounds i8, i8* %tmp11, i64 1
+  %tmp15 = bitcast %class.basic_string* %tmp1 to i8*
+  br label %_ZN12basic_stringIcSt11char_traitsIcESaIcEEC2EPKcRKS2_.exit
+
+_ZN12basic_stringIcSt11char_traitsIcESaIcEEC2EPKcRKS2_.exit: ; preds = %bb
+  store i8* %tmp4, i8** %tmp5, align 8
+  store i8 62, i8* %tmp4, align 8
+  store i64 1, i64* %tmp6, align 8
+  store i8 0, i8* %tmp7, align 1
+  %tmp16 = call zeroext i1 @callee_takes_string(%class.basic_string* nonnull %tmp)
+  br label %_ZN9__gnu_cxx17__sso_string_baseIcSt11char_traitsIcESaIcEED2Ev.exit3
+
+_ZN9__gnu_cxx17__sso_string_baseIcSt11char_traitsIcESaIcEED2Ev.exit3: ; preds = %_ZN12basic_stringIcSt11char_traitsIcESaIcEEC2EPKcRKS2_.exit
+
+; CHECK: _ZN9__gnu_cxx17__sso_string_baseIcSt11char_traitsIcESaIcEED2Ev.exit3:
+
+; The following can be read through the call %tmp17:
+  store i8* %tmp11, i8** %tmp12, align 8
+  store i8 125, i8* %tmp11, align 8
+  store i64 1, i64* %tmp13, align 8
+  store i8 0, i8* %tmp14, align 1
+
+; CHECK: store i8* %tmp11, i8** %tmp12, align 8
+; CHECK: store i8 125, i8* %tmp11, align 8
+; CHECK: store i64 1, i64* %tmp13, align 8
+; CHECK: store i8 0, i8* %tmp14, align 1
+
+  %tmp17 = call zeroext i1 @callee_takes_string(%class.basic_string* nonnull %tmp1)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %tmp11, i8 -51, i64 16, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* align 8 %tmp15, i8 -51, i64 32, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* align 8 %tmp4, i8 -51, i64 16, i1 false) #0
+  call void @llvm.memset.p0i8.i64(i8* align 8 %tmp8, i8 -51, i64 32, i1 false) #0
+  ret i1 %tmp17
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { noinline nounwind readonly uwtable }
+attributes #2 = { nounwind uwtable }
+

Added: llvm/trunk/test/Transforms/DeadStoreElimination/debuginfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/debuginfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/debuginfo.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/debuginfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; RUN: opt < %s -debugify -basicaa -dse -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+declare noalias i8* @malloc(i32)
+
+declare void @test_f()
+
+define i32* @test_salvage(i32 %arg) {
+; Check that all four original local variables have their values preserved.
+; CHECK-LABEL: @test_salvage(
+; CHECK-NEXT: malloc
+; CHECK-NEXT: @llvm.dbg.value(metadata i8* %p, metadata ![[p:.*]], metadata !DIExpression())
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: @llvm.dbg.value(metadata i32* %P, metadata ![[P:.*]], metadata !DIExpression())
+; CHECK-NEXT: @llvm.dbg.value(metadata i32 %arg, metadata ![[DEAD:.*]], metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value))
+; CHECK-NEXT: call void @test_f()
+; CHECK-NEXT: store i32 0, i32* %P
+
+  %p = tail call i8* @malloc(i32 4)
+  %P = bitcast i8* %p to i32*
+  %DEAD = add i32 %arg, 1
+  store i32 %DEAD, i32* %P
+  call void @test_f()
+  store i32 0, i32* %P
+  ret i32* %P
+}
+
+; CHECK: ![[p]] = !DILocalVariable(name: "1"
+; CHECK: ![[P]] = !DILocalVariable(name: "2"
+; CHECK: ![[DEAD]] = !DILocalVariable(name: "3"

Added: llvm/trunk/test/Transforms/DeadStoreElimination/dominate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/dominate.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/dominate.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/dominate.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt -dse -disable-output < %s
+; test that we don't crash
+declare void @bar()
+
+define void @foo() {
+bb1:
+  %memtmp3.i = alloca [21 x i8], align 1
+  %0 = getelementptr inbounds [21 x i8], [21 x i8]* %memtmp3.i, i64 0, i64 0
+  br label %bb3
+
+bb2:
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0)
+  br label %bb3
+
+bb3:
+  call void @bar()
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0)
+  br label %bb4
+
+bb4:
+  ret void
+
+}
+
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind

Added: llvm/trunk/test/Transforms/DeadStoreElimination/fence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/fence.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/fence.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/fence.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,96 @@
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+; We conservative choose to prevent dead store elimination
+; across release or stronger fences.  It's not required 
+; (since the must still be a race on %addd.i), but
+; it is conservatively correct.  A legal optimization
+; could hoist the second store above the fence, and then
+; DSE one of them.
+define void @test1(i32* %addr.i) {
+; CHECK-LABEL: @test1
+; CHECK: store i32 5
+; CHECK: fence
+; CHECK: store i32 5
+; CHECK: ret
+  store i32 5, i32* %addr.i, align 4
+  fence release
+  store i32 5, i32* %addr.i, align 4
+  ret void
+}
+
+; Same as previous, but with different values.  If we ever optimize 
+; this more aggressively, this allows us to check that the correct
+; store is retained (the 'i32 1' store in this case)
+define void @test1b(i32* %addr.i) {
+; CHECK-LABEL: @test1b
+; CHECK: store i32 42
+; CHECK: fence release
+; CHECK: store i32 1
+; CHECK: ret
+  store i32 42, i32* %addr.i, align 4
+  fence release
+  store i32 1, i32* %addr.i, align 4
+  ret void
+}
+
+; We *could* DSE across this fence, but don't.  No other thread can
+; observe the order of the acquire fence and the store.
+define void @test2(i32* %addr.i) {
+; CHECK-LABEL: @test2
+; CHECK: store
+; CHECK: fence
+; CHECK: store
+; CHECK: ret
+  store i32 5, i32* %addr.i, align 4
+  fence acquire
+  store i32 5, i32* %addr.i, align 4
+  ret void
+}
+
+; We DSE stack alloc'ed and byval locations, in the presence of fences.
+; Fence does not make an otherwise thread local store visible.
+; Right now the DSE in presence of fence is only done in end blocks (with no successors),
+; but the same logic applies to other basic blocks as well.
+; The store to %addr.i can be removed since it is a byval attribute
+define void @test3(i32* byval %addr.i) {
+; CHECK-LABEL: @test3
+; CHECK-NOT: store
+; CHECK: fence
+; CHECK: ret
+  store i32 5, i32* %addr.i, align 4
+  fence release
+  ret void
+}
+
+declare void @foo(i8* nocapture %p)
+
+declare noalias i8* @malloc(i32)
+
+; DSE of stores in locations allocated through library calls.
+define void @test_nocapture() {
+; CHECK-LABEL: @test_nocapture
+; CHECK: malloc
+; CHECK: foo
+; CHECK-NOT: store
+; CHECK: fence
+  %m  =  call i8* @malloc(i32 24)
+  call void @foo(i8* %m)
+  store i8 4, i8* %m
+  fence release
+  ret void
+}
+
+
+; This is a full fence, but it does not make a thread local store visible.
+; We can DSE the store in presence of the fence.
+define void @fence_seq_cst() {
+; CHECK-LABEL: @fence_seq_cst
+; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: ret void
+  %P1 = alloca i32
+  store i32 0, i32* %P1, align 4
+  fence seq_cst
+  store i32 4, i32* %P1, align 4
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/DeadStoreElimination/free.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/free.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/free.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/free.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,70 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+declare void @free(i8* nocapture)
+declare noalias i8* @malloc(i64)
+
+; CHECK-LABEL: @test(
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: @free
+; CHECK-NEXT: ret void
+define void @test(i32* %Q, i32* %P) {
+        %DEAD = load i32, i32* %Q            ; <i32> [#uses=1]
+        store i32 %DEAD, i32* %P
+        %1 = bitcast i32* %P to i8*
+        tail call void @free(i8* %1) nounwind
+        ret void
+}
+
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: @free
+; CHECK-NEXT: ret void
+define void @test2({i32, i32}* %P) {
+	%Q = getelementptr {i32, i32}, {i32, i32} *%P, i32 0, i32 1
+	store i32 4, i32* %Q
+        %1 = bitcast {i32, i32}* %P to i8*
+        tail call void @free(i8* %1) nounwind
+	ret void
+}
+
+; CHECK-LABEL: @test3(
+; CHECK-NOT: store
+; CHECK: ret void
+define void @test3() {
+  %m = call i8* @malloc(i64 24)
+  store i8 0, i8* %m
+  %m1 = getelementptr i8, i8* %m, i64 1
+  store i8 1, i8* %m1
+  call void @free(i8* %m) nounwind
+  ret void
+}
+
+; PR11240
+; CHECK-LABEL: @test4(
+; CHECK-NOT: store
+; CHECK: ret void
+define void @test4(i1 %x) nounwind {
+entry:
+  %alloc1 = tail call noalias i8* @malloc(i64 4) nounwind
+  br i1 %x, label %skipinit1, label %init1
+
+init1:
+  store i8 1, i8* %alloc1
+  br label %skipinit1
+
+skipinit1:
+  tail call void @free(i8* %alloc1) nounwind
+  ret void
+}
+
+; CHECK-LABEL: @test5(
+define void @test5() {
+  br label %bb
+
+bb:
+  tail call void @free(i8* undef) nounwind
+  br label %bb
+}
+

Added: llvm/trunk/test/Transforms/DeadStoreElimination/inst-limits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/inst-limits.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/inst-limits.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/inst-limits.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,261 @@
+; RUN: opt -S -dse < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; If there are two stores to the same location, DSE should be able to remove
+; the first store if the two stores are separated by no more than 98
+; instructions. The existence of debug intrinsics between the stores should
+; not affect this instruction limit.
+
+ at x = global i32 0, align 4
+
+; Function Attrs: nounwind
+define i32 @test_within_limit() !dbg !4 {
+entry:
+  ; The first store; later there is a second store to the same location,
+  ; so this store should be optimized away by DSE.
+  ; CHECK-NOT: store i32 1, i32* @x, align 4
+  store i32 1, i32* @x, align 4
+
+  ; Insert 98 dummy instructions between the two stores
+  %0 = bitcast i32 0 to i32
+  %1 = bitcast i32 0 to i32
+  %2 = bitcast i32 0 to i32
+  %3 = bitcast i32 0 to i32
+  %4 = bitcast i32 0 to i32
+  %5 = bitcast i32 0 to i32
+  %6 = bitcast i32 0 to i32
+  %7 = bitcast i32 0 to i32
+  %8 = bitcast i32 0 to i32
+  %9 = bitcast i32 0 to i32
+  %10 = bitcast i32 0 to i32
+  %11 = bitcast i32 0 to i32
+  %12 = bitcast i32 0 to i32
+  %13 = bitcast i32 0 to i32
+  %14 = bitcast i32 0 to i32
+  %15 = bitcast i32 0 to i32
+  %16 = bitcast i32 0 to i32
+  %17 = bitcast i32 0 to i32
+  %18 = bitcast i32 0 to i32
+  %19 = bitcast i32 0 to i32
+  %20 = bitcast i32 0 to i32
+  %21 = bitcast i32 0 to i32
+  %22 = bitcast i32 0 to i32
+  %23 = bitcast i32 0 to i32
+  %24 = bitcast i32 0 to i32
+  %25 = bitcast i32 0 to i32
+  %26 = bitcast i32 0 to i32
+  %27 = bitcast i32 0 to i32
+  %28 = bitcast i32 0 to i32
+  %29 = bitcast i32 0 to i32
+  %30 = bitcast i32 0 to i32
+  %31 = bitcast i32 0 to i32
+  %32 = bitcast i32 0 to i32
+  %33 = bitcast i32 0 to i32
+  %34 = bitcast i32 0 to i32
+  %35 = bitcast i32 0 to i32
+  %36 = bitcast i32 0 to i32
+  %37 = bitcast i32 0 to i32
+  %38 = bitcast i32 0 to i32
+  %39 = bitcast i32 0 to i32
+  %40 = bitcast i32 0 to i32
+  %41 = bitcast i32 0 to i32
+  %42 = bitcast i32 0 to i32
+  %43 = bitcast i32 0 to i32
+  %44 = bitcast i32 0 to i32
+  %45 = bitcast i32 0 to i32
+  %46 = bitcast i32 0 to i32
+  %47 = bitcast i32 0 to i32
+  %48 = bitcast i32 0 to i32
+  %49 = bitcast i32 0 to i32
+  %50 = bitcast i32 0 to i32
+  %51 = bitcast i32 0 to i32
+  %52 = bitcast i32 0 to i32
+  %53 = bitcast i32 0 to i32
+  %54 = bitcast i32 0 to i32
+  %55 = bitcast i32 0 to i32
+  %56 = bitcast i32 0 to i32
+  %57 = bitcast i32 0 to i32
+  %58 = bitcast i32 0 to i32
+  %59 = bitcast i32 0 to i32
+  %60 = bitcast i32 0 to i32
+  %61 = bitcast i32 0 to i32
+  %62 = bitcast i32 0 to i32
+  %63 = bitcast i32 0 to i32
+  %64 = bitcast i32 0 to i32
+  %65 = bitcast i32 0 to i32
+  %66 = bitcast i32 0 to i32
+  %67 = bitcast i32 0 to i32
+  %68 = bitcast i32 0 to i32
+  %69 = bitcast i32 0 to i32
+  %70 = bitcast i32 0 to i32
+  %71 = bitcast i32 0 to i32
+  %72 = bitcast i32 0 to i32
+  %73 = bitcast i32 0 to i32
+  %74 = bitcast i32 0 to i32
+  %75 = bitcast i32 0 to i32
+  %76 = bitcast i32 0 to i32
+  %77 = bitcast i32 0 to i32
+  %78 = bitcast i32 0 to i32
+  %79 = bitcast i32 0 to i32
+  %80 = bitcast i32 0 to i32
+  %81 = bitcast i32 0 to i32
+  %82 = bitcast i32 0 to i32
+  %83 = bitcast i32 0 to i32
+  %84 = bitcast i32 0 to i32
+  %85 = bitcast i32 0 to i32
+  %86 = bitcast i32 0 to i32
+  %87 = bitcast i32 0 to i32
+  %88 = bitcast i32 0 to i32
+  %89 = bitcast i32 0 to i32
+  %90 = bitcast i32 0 to i32
+  %91 = bitcast i32 0 to i32
+  %92 = bitcast i32 0 to i32
+  %93 = bitcast i32 0 to i32
+  %94 = bitcast i32 0 to i32
+  %95 = bitcast i32 0 to i32
+  %96 = bitcast i32 0 to i32
+  %97 = bitcast i32 0 to i32
+
+  ; Insert a meaningless dbg.value intrinsic; it should have no
+  ; effect on the working of DSE in any way.
+  call void @llvm.dbg.value(metadata i32 undef, metadata !10, metadata !DIExpression()), !dbg !DILocation(scope: !4)
+
+  ; CHECK:  store i32 -1, i32* @x, align 4
+  store i32 -1, i32* @x, align 4
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+define i32 @test_outside_limit() {
+entry:
+  ; The first store; later there is a second store to the same location
+  ; CHECK: store i32 1, i32* @x, align 4
+  store i32 1, i32* @x, align 4
+
+  ; Insert 99 dummy instructions between the two stores; this is
+  ; one too many instruction for the DSE to take place.
+  %0 = bitcast i32 0 to i32
+  %1 = bitcast i32 0 to i32
+  %2 = bitcast i32 0 to i32
+  %3 = bitcast i32 0 to i32
+  %4 = bitcast i32 0 to i32
+  %5 = bitcast i32 0 to i32
+  %6 = bitcast i32 0 to i32
+  %7 = bitcast i32 0 to i32
+  %8 = bitcast i32 0 to i32
+  %9 = bitcast i32 0 to i32
+  %10 = bitcast i32 0 to i32
+  %11 = bitcast i32 0 to i32
+  %12 = bitcast i32 0 to i32
+  %13 = bitcast i32 0 to i32
+  %14 = bitcast i32 0 to i32
+  %15 = bitcast i32 0 to i32
+  %16 = bitcast i32 0 to i32
+  %17 = bitcast i32 0 to i32
+  %18 = bitcast i32 0 to i32
+  %19 = bitcast i32 0 to i32
+  %20 = bitcast i32 0 to i32
+  %21 = bitcast i32 0 to i32
+  %22 = bitcast i32 0 to i32
+  %23 = bitcast i32 0 to i32
+  %24 = bitcast i32 0 to i32
+  %25 = bitcast i32 0 to i32
+  %26 = bitcast i32 0 to i32
+  %27 = bitcast i32 0 to i32
+  %28 = bitcast i32 0 to i32
+  %29 = bitcast i32 0 to i32
+  %30 = bitcast i32 0 to i32
+  %31 = bitcast i32 0 to i32
+  %32 = bitcast i32 0 to i32
+  %33 = bitcast i32 0 to i32
+  %34 = bitcast i32 0 to i32
+  %35 = bitcast i32 0 to i32
+  %36 = bitcast i32 0 to i32
+  %37 = bitcast i32 0 to i32
+  %38 = bitcast i32 0 to i32
+  %39 = bitcast i32 0 to i32
+  %40 = bitcast i32 0 to i32
+  %41 = bitcast i32 0 to i32
+  %42 = bitcast i32 0 to i32
+  %43 = bitcast i32 0 to i32
+  %44 = bitcast i32 0 to i32
+  %45 = bitcast i32 0 to i32
+  %46 = bitcast i32 0 to i32
+  %47 = bitcast i32 0 to i32
+  %48 = bitcast i32 0 to i32
+  %49 = bitcast i32 0 to i32
+  %50 = bitcast i32 0 to i32
+  %51 = bitcast i32 0 to i32
+  %52 = bitcast i32 0 to i32
+  %53 = bitcast i32 0 to i32
+  %54 = bitcast i32 0 to i32
+  %55 = bitcast i32 0 to i32
+  %56 = bitcast i32 0 to i32
+  %57 = bitcast i32 0 to i32
+  %58 = bitcast i32 0 to i32
+  %59 = bitcast i32 0 to i32
+  %60 = bitcast i32 0 to i32
+  %61 = bitcast i32 0 to i32
+  %62 = bitcast i32 0 to i32
+  %63 = bitcast i32 0 to i32
+  %64 = bitcast i32 0 to i32
+  %65 = bitcast i32 0 to i32
+  %66 = bitcast i32 0 to i32
+  %67 = bitcast i32 0 to i32
+  %68 = bitcast i32 0 to i32
+  %69 = bitcast i32 0 to i32
+  %70 = bitcast i32 0 to i32
+  %71 = bitcast i32 0 to i32
+  %72 = bitcast i32 0 to i32
+  %73 = bitcast i32 0 to i32
+  %74 = bitcast i32 0 to i32
+  %75 = bitcast i32 0 to i32
+  %76 = bitcast i32 0 to i32
+  %77 = bitcast i32 0 to i32
+  %78 = bitcast i32 0 to i32
+  %79 = bitcast i32 0 to i32
+  %80 = bitcast i32 0 to i32
+  %81 = bitcast i32 0 to i32
+  %82 = bitcast i32 0 to i32
+  %83 = bitcast i32 0 to i32
+  %84 = bitcast i32 0 to i32
+  %85 = bitcast i32 0 to i32
+  %86 = bitcast i32 0 to i32
+  %87 = bitcast i32 0 to i32
+  %88 = bitcast i32 0 to i32
+  %89 = bitcast i32 0 to i32
+  %90 = bitcast i32 0 to i32
+  %91 = bitcast i32 0 to i32
+  %92 = bitcast i32 0 to i32
+  %93 = bitcast i32 0 to i32
+  %94 = bitcast i32 0 to i32
+  %95 = bitcast i32 0 to i32
+  %96 = bitcast i32 0 to i32
+  %97 = bitcast i32 0 to i32
+  %98 = bitcast i32 0 to i32
+
+  ; CHECK:  store i32 -1, i32* @x, align 4
+  store i32 -1, i32* @x, align 4
+  ret i32 0
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "test.c", directory: "/home/tmp")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test_within_limit", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 4, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "test.c", directory: "/home/tmp")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DILocalVariable(name: "x", scope: !4, type: !8)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32* undef}
+
+!13 = !{i32 1, !"Debug Info Version", i32 3}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/int_sideeffect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/int_sideeffect.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/int_sideeffect.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/int_sideeffect.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt -S < %s -dse | FileCheck %s
+
+declare void @llvm.sideeffect()
+
+; Dead store elimination across a @llvm.sideeffect.
+
+; CHECK-LABEL: dse
+; CHECK: store
+; CHECK-NOT: store
+define void @dse(float* %p) {
+    store float 0.0, float* %p
+    call void @llvm.sideeffect()
+    store float 0.0, float* %p
+    ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/invariant.start.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/invariant.start.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/invariant.start.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/invariant.start.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; Test to make sure llvm.invariant.start calls are not treated as clobbers.
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly
+
+; We cannot remove the store 1 to %p.
+; FIXME: By the semantics of invariant.start, the store 3 to p is unreachable.
+define void @test(i8 *%p) {
+  store i8 1, i8* %p, align 4
+  %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %p)
+  store i8 3, i8* %p, align 4
+  ret void
+; CHECK-LABEL: @test(
+; CHECK-NEXT: store i8 1, i8* %p, align 4
+; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %p)
+; CHECK-NEXT: store i8 3, i8* %p, align 4
+; CHECK-NEXT: ret void
+}
+
+; FIXME: We should be able to remove the first store to p, even though p and q
+; may alias.
+define void @test2(i8* %p, i8* %q) {
+  store i8 1, i8* %p, align 4
+  store i8 2, i8* %q, align 4
+  %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %q)
+  store i8 3, i8* %p, align 4
+  ret void
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: store i8 1, i8* %p, align 4
+; CHECK-NEXT: store i8 2, i8* %q, align 4
+; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %q)
+; CHECK-NEXT: store i8 3, i8* %p, align 4
+; CHECK-NEXT: ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/launder.invariant.group.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/launder.invariant.group.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/launder.invariant.group.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/launder.invariant.group.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,65 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+; CHECK-LABEL: void @skipBarrier(i8* %ptr)
+define void @skipBarrier(i8* %ptr) {
+; CHECK-NOT: store i8 42
+  store i8 42, i8* %ptr
+; CHECK: %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr)
+  %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr)
+; CHECK: store i8 43
+  store i8 43, i8* %ptr2
+  ret void
+}
+
+; CHECK-LABEL: void @skip2Barriers(i8* %ptr)
+define void @skip2Barriers(i8* %ptr) {
+; CHECK-NOT: store i8 42
+  store i8 42, i8* %ptr
+; CHECK: %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr)
+  %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr)
+; CHECK-NOT: store i8 43
+  store i8 43, i8* %ptr2
+  %ptr3 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr2)
+  %ptr4 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr3)
+
+; CHECK: store i8 44
+  store i8 44, i8* %ptr4
+  ret void
+}
+
+; CHECK-LABEL: void @skip3Barriers(i8* %ptr)
+define void @skip3Barriers(i8* %ptr) {
+; CHECK-NOT: store i8 42
+  store i8 42, i8* %ptr
+; CHECK: %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr)
+  %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr)
+; CHECK-NOT: store i8 43
+  store i8 43, i8* %ptr2
+  %ptr3 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr2)
+  %ptr4 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr3)
+
+; CHECK: store i8 44
+  store i8 44, i8* %ptr4
+  ret void
+}
+
+; CHECK-LABEL: void @skip4Barriers(i8* %ptr)
+define void @skip4Barriers(i8* %ptr) {
+; CHECK-NOT: store i8 42
+  store i8 42, i8* %ptr
+; CHECK: %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr)
+  %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr)
+; CHECK-NOT: store i8 43
+  store i8 43, i8* %ptr2
+  %ptr3 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr2)
+  %ptr4 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr3)
+  %ptr5 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr3)
+
+; CHECK: store i8 44
+  store i8 44, i8* %ptr5
+  ret void
+}
+
+
+declare i8* @llvm.launder.invariant.group.p0i8(i8*)
+declare i8* @llvm.strip.invariant.group.p0i8(i8*)
\ No newline at end of file

Added: llvm/trunk/test/Transforms/DeadStoreElimination/libcalls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/libcalls.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/libcalls.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/libcalls.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,70 @@
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+declare i8* @strcpy(i8* %dest, i8* %src) nounwind
+define void @test1(i8* %src) {
+; CHECK-LABEL: @test1(
+  %B = alloca [16 x i8]
+  %dest = getelementptr inbounds [16 x i8], [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strcpy
+  %call = call i8* @strcpy(i8* %dest, i8* %src)
+; CHECK: ret void
+  ret void
+}
+
+declare i8* @strncpy(i8* %dest, i8* %src, i32 %n) nounwind
+define void @test2(i8* %src) {
+; CHECK-LABEL: @test2(
+  %B = alloca [16 x i8]
+  %dest = getelementptr inbounds [16 x i8], [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strncpy
+  %call = call i8* @strncpy(i8* %dest, i8* %src, i32 12)
+; CHECK: ret void
+  ret void
+}
+
+declare i8* @strcat(i8* %dest, i8* %src) nounwind
+define void @test3(i8* %src) {
+; CHECK-LABEL: @test3(
+  %B = alloca [16 x i8]
+  %dest = getelementptr inbounds [16 x i8], [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strcat
+  %call = call i8* @strcat(i8* %dest, i8* %src)
+; CHECK: ret void
+  ret void
+}
+
+declare i8* @strncat(i8* %dest, i8* %src, i32 %n) nounwind
+define void @test4(i8* %src) {
+; CHECK-LABEL: @test4(
+  %B = alloca [16 x i8]
+  %dest = getelementptr inbounds [16 x i8], [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strncat
+  %call = call i8* @strncat(i8* %dest, i8* %src, i32 12)
+; CHECK: ret void
+  ret void
+}
+
+define void @test5(i8* nocapture %src) {
+; CHECK-LABEL: @test5(
+  %dest = alloca [100 x i8], align 16
+  %arraydecay = getelementptr inbounds [100 x i8], [100 x i8]* %dest, i64 0, i64 0
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %src)
+; CHECK: %call = call i8* @strcpy
+  %arrayidx = getelementptr inbounds i8, i8* %call, i64 10
+  store i8 97, i8* %arrayidx, align 1
+  ret void
+}
+
+declare void @user(i8* %p)
+define void @test6(i8* %src) {
+; CHECK-LABEL: @test6(
+  %B = alloca [16 x i8]
+  %dest = getelementptr inbounds [16 x i8], [16 x i8]* %B, i64 0, i64 0
+; CHECK: @strcpy
+  %call = call i8* @strcpy(i8* %dest, i8* %src)
+; CHECK: @user
+  call void @user(i8* %dest)
+; CHECK: ret void
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/DeadStoreElimination/lifetime.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/lifetime.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/lifetime.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/lifetime.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+  %A = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memset
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %A)
+; CHECK: lifetime.end
+
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i1 false)
+; CHECK-NOT: memset
+
+  ret void
+; CHECK: ret void
+}
+
+define void @test2(i32* %P) {
+; CHECK: test2
+  %Q = getelementptr i32, i32* %P, i32 1
+  %R = bitcast i32* %Q to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %R)
+; CHECK: lifetime.start
+  store i32 0, i32* %Q  ;; This store is dead.
+; CHECK-NOT: store
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %R)
+; CHECK: lifetime.end
+  ret void
+}
+
+

Added: llvm/trunk/test/Transforms/DeadStoreElimination/mda-with-dbg-values.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/mda-with-dbg-values.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/mda-with-dbg-values.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/mda-with-dbg-values.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,72 @@
+; RUN: opt -S -dse -memdep-block-scan-limit=3 < %s | FileCheck %s
+; RUN: opt -S -strip-debug -dse -memdep-block-scan-limit=3 < %s | FileCheck %s
+
+; Test case to check that the memory dependency analysis gets the same
+; result even if we have a dbg value between the memcpy and
+; store. The memory dependency is then used by DSE to remove the store.
+
+; We use -memdep-block-scan-limit=3 to be able to create a small test case.
+; Without it, we would need to squeeze in 100 instructions since the default
+; limit is 100.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at g = common global [1 x i8] zeroinitializer, align 1, !dbg !0
+
+; Function Attrs: noinline nounwind uwtable
+define void @foo() #0 !dbg !14 {
+entry:
+  %i = alloca i8, align 1
+  store i8 1, i8* %i, align 1, !dbg !19
+  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
+  %0 = bitcast [1 x i8]* @g to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %i, i8* %0, i64 1, i1 false), !dbg !20
+  br label %bb2
+
+bb2:                                              ; preds = %0
+  ret void, !dbg !21
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #2
+
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { argmemonly nounwind }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10, !11, !12}
+!llvm.ident = !{!13}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 3, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 6.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
+!3 = !DIFile(filename: "foo.c", directory: "/bar")
+!4 = !{}
+!5 = !{!0}
+!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 8, elements: !8)
+!7 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!8 = !{!9}
+!9 = !DISubrange(count: 1)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{i32 1, !"wchar_size", i32 4}
+!13 = !{!"clang version 6.0.0"}
+!14 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 5, type: !15, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: false, unit: !2, retainedNodes: !4)
+!15 = !DISubroutineType(types: !16)
+!16 = !{null}
+!17 = !DILocalVariable(name: "i", scope: !14, file: !3, line: 7, type: !7)
+!18 = !DILocation(line: 7, column: 10, scope: !14)
+!19 = !DILocation(line: 8, column: 7, scope: !14)
+!20 = !DILocation(line: 9, column: 5, scope: !14)
+!21 = !DILocation(line: 10, column: 1, scope: !14)
+
+; Check that the store is removed and that the memcpy is still there
+; CHECK-LABEL: foo
+; CHECK-NOT:   store i8
+; CHECK:       call void @llvm.memcpy
+; CHECK:       ret void

Added: llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -dse < %s | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret void
+;
+  %A = alloca i8
+  %B = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memcpy
+
+  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false)
+
+  ret void
+}
+
+define void @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret void
+;
+  %A = alloca i8
+  %B = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memmove
+
+  call void @llvm.memmove.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false)
+
+  ret void
+}
+
+define void @test3() {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret void
+;
+  %A = alloca i8
+  %B = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memset
+
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i1 false)
+
+  ret void
+}
+
+declare void @llvm.memcpy.element.unordered.atomic.p0i16.p0i16.i16(i16* nocapture, i16* nocapture, i16, i32) nounwind
+declare void @llvm.memmove.element.unordered.atomic.p0i16.p0i16.i16(i16* nocapture, i16* nocapture, i16, i32) nounwind
+declare void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* nocapture, i8, i16, i32) nounwind
+
+
+define void @test4() {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    ret void
+;
+  %A = alloca i16, i16 1024, align 2
+  %B = alloca i16, i16 1024, align 2
+
+  store atomic i16 0, i16* %A unordered, align 2 ;; Written to by memcpy
+  store atomic i16 0, i16* %B unordered, align 2 ;; Read by memcpy
+
+  call void @llvm.memcpy.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 %A, i16* align 2 %B, i16 1024, i32 2)
+
+  ret void
+}
+
+define void @test5() {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret void
+;
+  %A = alloca i16, i16 1024, align 2
+  %B = alloca i16, i16 1024, align 2
+
+  store atomic i16 0, i16* %A unordered, align 2 ;; Written to by memmove
+  store atomic i16 0, i16* %B unordered, align 2 ;; Read by memmove
+
+  call void @llvm.memmove.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 %A, i16* align 2 %B, i16 1024, i32 2)
+
+  ret void
+}
+
+define void @test6() {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret void
+;
+  %A = alloca i16, i16 1024, align 2
+  %B = alloca i16, i16 1024, align 2
+
+  store atomic i16 0, i16* %A unordered, align 2 ;; Written to by memset
+
+  call void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* align 2 %A, i8 0, i16 1024, i32 2)
+
+  ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/memset-missing-debugloc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/memset-missing-debugloc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/memset-missing-debugloc.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/memset-missing-debugloc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,89 @@
+; Test that the getelementptr generated when the dse pass determines that
+; a memset can be shortened has the debugloc carried over from the memset.
+
+; RUN: opt -S -march=native -dse < %s| FileCheck %s
+; CHECK: bitcast [5 x i64]* %{{[a-zA-Z_][a-zA-Z0-9_]*}} to i8*, !dbg
+; CHECK-NEXT: %{{[0-9]+}} = getelementptr inbounds i8, i8* %0, i64 32, !dbg ![[DBG:[0-9]+]]
+; CHECK: ![[DBG]] = !DILocation(line: 2,
+
+; The test IR is generated by running:
+;
+; clang Debugify_Dead_Store_Elimination.cpp -Wno-c++11-narrowing -S \
+;   -emit-llvm -O0 -w -Xclang -disable-O0-optnone -march=native -fdeclspec \
+;   --target=x86_64-gnu-linux-unknown -Werror=unreachable-code -o -
+;
+; Where Debugify_Dead_Store_Elimination.cpp contains:
+;
+; int a() {
+;   long b[]{2, 2, 2, 2, 0};
+;   if (a())
+;     ;
+; }
+
+
+define dso_local i32 @_Z1av() !dbg !7 {
+entry:
+  %retval = alloca i32, align 4
+  %b = alloca [5 x i64], align 16
+  call void @llvm.dbg.declare(metadata [5 x i64]* %b, metadata !11, metadata !DIExpression()), !dbg !16
+  %0 = bitcast [5 x i64]* %b to i8*, !dbg !16
+  call void @llvm.memset.p0i8.i64(i8* align 16 %0, i8 0, i64 40, i1 false), !dbg !16
+  %1 = bitcast i8* %0 to [5 x i64]*, !dbg !16
+  %2 = getelementptr inbounds [5 x i64], [5 x i64]* %1, i32 0, i32 0, !dbg !16
+  store i64 2, i64* %2, align 16, !dbg !16
+  %3 = getelementptr inbounds [5 x i64], [5 x i64]* %1, i32 0, i32 1, !dbg !16
+  store i64 2, i64* %3, align 8, !dbg !16
+  %4 = getelementptr inbounds [5 x i64], [5 x i64]* %1, i32 0, i32 2, !dbg !16
+  store i64 2, i64* %4, align 16, !dbg !16
+  %5 = getelementptr inbounds [5 x i64], [5 x i64]* %1, i32 0, i32 3, !dbg !16
+  store i64 2, i64* %5, align 8, !dbg !16
+  %call = call i32 @_Z1av(), !dbg !17
+  %tobool = icmp ne i32 %call, 0, !dbg !17
+  br i1 %tobool, label %if.then, label %if.end, !dbg !19
+
+if.then:                                          ; preds = %entry
+  br label %if.end, !dbg !19
+
+if.end:                                           ; preds = %if.then, %entry
+  call void @llvm.trap(), !dbg !20
+  unreachable, !dbg !20
+
+return:                                           ; No predecessors!
+  %6 = load i32, i32* %retval, align 4, !dbg !21
+  ret i32 %6, !dbg !21
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
+
+; Function Attrs: cold noreturn nounwind
+declare void @llvm.trap()
+
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git eb1a156d7f7ba56ea8f9a26da36e6a93d1e98bda)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "Debugify_Dead_Store_Elimination.cpp", directory: "")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git eb1a156d7f7ba56ea8f9a26da36e6a93d1e98bda)"}
+!7 = distinct !DISubprogram(name: "a", linkageName: "_Z1av", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "b", scope: !7, file: !1, line: 2, type: !12)
+!12 = !DICompositeType(tag: DW_TAG_array_type, baseType: !13, size: 320, elements: !14)
+!13 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed)
+!14 = !{!15}
+!15 = !DISubrange(count: 5)
+!16 = !DILocation(line: 2, column: 8, scope: !7)
+!17 = !DILocation(line: 3, column: 7, scope: !18)
+!18 = distinct !DILexicalBlock(scope: !7, file: !1, line: 3, column: 7)
+!19 = !DILocation(line: 3, column: 7, scope: !7)
+!20 = !DILocation(line: 3, column: 9, scope: !18)
+!21 = !DILocation(line: 5, column: 1, scope: !7)

Added: llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores-big-endian.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,173 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
+
+define void @byte_by_byte_replacement(i32 *%ptr) {
+; CHECK-LABEL: @byte_by_byte_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 151653132, i32* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  ;; This store's value should be modified as it should be better to use one
+  ;; larger store than several smaller ones.
+  ;; store will turn into 0x090A0B0C == 151653132
+  store i32 305419896, i32* %ptr  ; 0x12345678
+  %bptr = bitcast i32* %ptr to i8*
+  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+  ;; We should be able to merge these four stores with the i32 above
+  ; value (and bytes) stored before  ; 0x12345678
+  store i8 9, i8* %bptr              ;   09
+  store i8 10, i8* %bptr1            ;     0A
+  store i8 11, i8* %bptr2            ;       0B
+  store i8 12, i8* %bptr3            ;         0C
+  ;                                    0x090A0B0C
+
+  ret void
+}
+
+define void @word_replacement(i64 *%ptr) {
+; CHECK-LABEL: @word_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 72638273700655232, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 72623859790382856, i64* %ptr  ; 0x0102030405060708
+
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3
+
+  ;; We should be able to merge these two stores with the i64 one above
+  ; value (and bytes) stored before  ; 0x0102030405060708
+  store i16  4128, i16* %wptr1       ;       1020
+  store i16 28800, i16* %wptr3       ;               7080
+  ;                                    0x0102102005067080
+
+  ret void
+}
+
+
+define void @differently_sized_replacements(i64 *%ptr) {
+; CHECK-LABEL: @differently_sized_replacements(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 289077004501059343, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (and bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr6    ;               07
+  store i16     1541, i16* %wptr2    ;           0605
+  store i32 67305985, i32* %dptr     ;   04030201
+  ;                                    0x040302010605070f
+  ret void
+}
+
+
+define void @multiple_replacements_to_same_byte(i64 *%ptr) {
+; CHECK-LABEL: @multiple_replacements_to_same_byte(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 289077004602248719, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (and bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr3    ;         07
+  store i16     1541, i16* %wptr1    ;       0605
+  store i32 67305985, i32* %dptr     ;   04030201
+  ;                                    0x040302010c0d0e0f
+  ret void
+}
+
+define void @merged_merges(i64 *%ptr) {
+; CHECK-LABEL: @merged_merges(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 289081428418563599, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i32 67305985, i32* %dptr     ;   04030201
+  store i16     1541, i16* %wptr1    ;       0605
+  store i8         7, i8*  %bptr3    ;         07
+  ;                                    0x040306070c0d0e0f
+  ret void
+}
+
+define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) {
+; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
+; CHECK-NEXT:    [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1
+; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
+; CHECK-NEXT:    [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32*
+; CHECK-NEXT:    [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64*
+; CHECK-NEXT:    store i32 1234, i32* [[DPTR]], align 1
+; CHECK-NEXT:    store i64 5678, i64* [[QPTR]], align 1
+; CHECK-NEXT:    ret i8 0
+;
+entry:
+
+  store i64 0, i64* %ptr
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %dptr = bitcast i8* %bptrm1 to i32*
+  %qptr = bitcast i8* %bptr3 to i64*
+
+  store i32 1234, i32* %dptr, align 1
+  store i64 5678, i64* %qptr, align 1
+
+  ret i8 0
+}
+
+;; Test case from PR31777
+%union.U = type { i64 }
+
+define void @foo(%union.U* nocapture %u) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0
+; CHECK-NEXT:    store i64 11821949021847552, i64* [[I]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0
+  store i64 0, i64* %i, align 8
+  %s = bitcast %union.U* %u to i16*
+  store i16 42, i16* %s, align 8
+  ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/merge-stores.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,237 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+
+define void @byte_by_byte_replacement(i32 *%ptr) {
+; CHECK-LABEL: @byte_by_byte_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 202050057, i32* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  ;; This store's value should be modified as it should be better to use one
+  ;; larger store than several smaller ones.
+  ;; store will turn into 0x0C0B0A09 == 202050057
+  store i32 305419896, i32* %ptr  ; 0x12345678
+  %bptr = bitcast i32* %ptr to i8*
+  %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+  %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+  ;; We should be able to merge these four stores with the i32 above
+  ; value (and bytes) stored before  ; 0x12345678
+  store i8 9, i8* %bptr              ;         09
+  store i8 10, i8* %bptr1            ;       0A
+  store i8 11, i8* %bptr2            ;     0B
+  store i8 12, i8* %bptr3            ;   0C
+  ;                                    0x0C0B0A09
+  ret void
+}
+
+define void @word_replacement(i64 *%ptr) {
+; CHECK-LABEL: @word_replacement(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 8106482645252179720, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 72623859790382856, i64* %ptr  ; 0x0102030405060708
+
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3
+
+  ;; We should be able to merge these two stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x0102030405060708
+  store i16  4128, i16* %wptr1       ;           1020
+  store i16 28800, i16* %wptr3       ;   7080
+  ;                                    0x7080030410200708
+  ret void
+}
+
+
+define void @differently_sized_replacements(i64 *%ptr) {
+; CHECK-LABEL: @differently_sized_replacements(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 578437695752307201, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr6    ;     07
+  store i16     1541, i16* %wptr2    ;       0605
+  store i32 67305985, i32* %dptr     ;           04030201
+  ;                                    0x0807060504030201
+  ret void
+}
+
+
+define void @multiple_replacements_to_same_byte(i64 *%ptr) {
+; CHECK-LABEL: @multiple_replacements_to_same_byte(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 579005069522043393, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i8         7, i8*  %bptr3    ;           07
+  store i16     1541, i16* %wptr1    ;           0605
+  store i32 67305985, i32* %dptr     ;           04030201
+  ;                                    0x08090a0b04030201
+  ret void
+}
+
+define void @merged_merges(i64 *%ptr) {
+; CHECK-LABEL: @merged_merges(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 579005069572506113, i64* [[PTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  store i64 579005069656919567, i64* %ptr  ; 0x08090a0b0c0d0e0f
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %wptr = bitcast i64* %ptr to i16*
+  %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+  %dptr = bitcast i64* %ptr to i32*
+
+  ;; We should be able to merge all these stores with the i64 one above
+  ; value (not bytes) stored before  ; 0x08090a0b0c0d0e0f
+  store i32 67305985, i32* %dptr     ;           04030201
+  store i16     1541, i16* %wptr1    ;           0605
+  store i8         7, i8*  %bptr3    ;           07
+  ;                                    0x08090a0b07050201
+  ret void
+}
+
+define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) {
+; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
+; CHECK-NEXT:    [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1
+; CHECK-NEXT:    [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
+; CHECK-NEXT:    [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32*
+; CHECK-NEXT:    [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64*
+; CHECK-NEXT:    store i32 1234, i32* [[DPTR]], align 1
+; CHECK-NEXT:    store i64 5678, i64* [[QPTR]], align 1
+; CHECK-NEXT:    ret i8 0
+;
+entry:
+
+  ; Also check that alias.scope metadata doesn't get dropped
+  store i64 0, i64* %ptr, !alias.scope !32
+
+  %bptr = bitcast i64* %ptr to i8*
+  %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+  %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+  %dptr = bitcast i8* %bptrm1 to i32*
+  %qptr = bitcast i8* %bptr3 to i64*
+
+  store i32 1234, i32* %dptr, align 1
+  store i64 5678, i64* %qptr, align 1
+
+  ret i8 0
+}
+
+;; Test case from PR31777
+%union.U = type { i64 }
+
+define void @foo(%union.U* nocapture %u) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0
+; CHECK-NEXT:    store i64 42, i64* [[I]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0
+  store i64 0, i64* %i, align 8, !dbg !22, !tbaa !26, !noalias !30, !nontemporal !29
+  %s = bitcast %union.U* %u to i16*
+  store i16 42, i16* %s, align 8
+  ret void
+}
+
+; Don't crash by operating on stale data if we merge (kill) the last 2 stores.
+
+define void @PR34074(i32* %x, i64* %y) {
+; CHECK-LABEL: @PR34074(
+; CHECK-NEXT:    store i64 42, i64* %y
+; CHECK-NEXT:    store i32 4, i32* %x
+; CHECK-NEXT:    ret void
+;
+  store i64 42, i64* %y          ; independent store
+  %xbc = bitcast i32* %x to i8*
+  store i32 0, i32* %x           ; big store of constant
+  store i8 4, i8* %xbc           ; small store with mergeable constant
+  ret void
+}
+
+; We can't eliminate the last store because P and Q may alias.
+
+define void @PR36129(i32* %P, i32* %Q) {
+; CHECK-LABEL: @PR36129(
+; CHECK-NEXT:    store i32 1, i32* [[P:%.*]]
+; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to i8*
+; CHECK-NEXT:    store i32 2, i32* [[Q:%.*]]
+; CHECK-NEXT:    store i8 3, i8* [[P2]]
+; CHECK-NEXT:    ret void
+;
+  store i32 1, i32* %P
+  %P2 = bitcast i32* %P to i8*
+  store i32 2, i32* %Q
+  store i8 3, i8* %P2
+  ret void
+}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 306512)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "me.cpp", directory: "/compiler-explorer")
+!2 = !{}
+!7 = distinct !DISubprogram(name: "foo", linkageName: "foo(U*)", scope: !1, file: !1, line: 9, type: !8, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !20)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!11 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "U", file: !1, line: 4, size: 64, elements: !12, identifier: "typeinfo name for U")
+!12 = !{!13, !17}
+!13 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !11, file: !1, line: 5, baseType: !14, size: 64)
+!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", file: !15, line: 55, baseType: !16)
+!15 = !DIFile(filename: "/usr/include/stdint.h", directory: "/compiler-explorer")
+!16 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "s", scope: !11, file: !1, line: 6, baseType: !18, size: 16)
+!18 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", file: !15, line: 49, baseType: !19)
+!19 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
+!20 = !{!21}
+!21 = !DILocalVariable(name: "u", arg: 1, scope: !7, file: !1, line: 9, type: !10)
+!22 = !DILocation(line: 10, column: 8, scope: !7)
+
+!26 = !{!27, !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C++ TBAA"}
+
+!29 = !{i32 1}
+
+; Domains and scopes which might alias
+!30 = !{!30}
+!31 = !{!31, !30}
+
+!32 = !{!32}
+!33 = !{!33, !32}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/no-targetdata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/no-targetdata.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/no-targetdata.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/no-targetdata.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+define void @fn(i8* nocapture %buf) #0 {
+entry:
+
+; We would not eliminate the first memcpy with data layout, and we should not
+; eliminate it without data layout.
+; CHECK-LABEL: @fn
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK: ret void
+
+  %arrayidx = getelementptr i8, i8* %buf, i64 18
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arrayidx, i8* %buf, i64 18, i1 false)
+  store i8 1, i8* %arrayidx, align 1
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %buf, i8* %arrayidx, i64 18, i1 false)
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/DeadStoreElimination/operand-bundles.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/operand-bundles.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/operand-bundles.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/operand-bundles.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,55 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+declare noalias i8* @malloc(i64) "malloc-like"
+
+declare void @foo()
+declare void @bar(i8*)
+
+define void @test() {
+  %obj = call i8* @malloc(i64 8)
+  store i8 0, i8* %obj
+  ; don't remove store. %obj should be treated like it will be read by the @foo.
+  ; CHECK: store i8 0, i8* %obj
+  call void @foo() ["deopt" (i8* %obj)]
+  ret void
+}
+
+define void @test1() {
+  %obj = call i8* @malloc(i64 8)
+  store i8 0, i8* %obj
+  ; CHECK: store i8 0, i8* %obj
+  call void @bar(i8* nocapture %obj)
+  ret void
+}
+
+define void @test2() {
+  %obj = call i8* @malloc(i64 8)
+  store i8 0, i8* %obj
+  ; CHECK-NOT: store i8 0, i8* %obj
+  call void @foo()
+  ret void
+}
+
+define void @test3() {
+  ; CHECK-LABEL: @test3(
+  %s = alloca i64
+  ; Verify that this first store is not considered killed by the second one
+  ; since it could be observed from the deopt continuation.
+  ; CHECK: store i64 1, i64* %s
+  store i64 1, i64* %s
+  call void @foo() [ "deopt"(i64* %s) ]
+  store i64 0, i64* %s
+  ret void
+}
+
+declare noalias i8* @calloc(i64, i64)
+
+define void @test4() {
+; CHECK-LABEL: @test4
+  %local_obj = call i8* @calloc(i64 1, i64 4)
+  call void @foo() ["deopt" (i8* %local_obj)]
+  store i8 0, i8* %local_obj, align 4
+  ; CHECK-NOT: store i8 0, i8* %local_obj, align 4
+  call void @bar(i8* nocapture %local_obj)
+  ret void
+}

Added: llvm/trunk/test/Transforms/DeadStoreElimination/pr11390.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/pr11390.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/pr11390.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/pr11390.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt -basicaa -dse -S < %s | FileCheck %s
+; PR11390
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define fastcc void @cat_domain(i8* nocapture %name, i8* nocapture %domain, i8** 
+nocapture %s) nounwind uwtable {
+entry:
+  %call = tail call i64 @strlen(i8* %name) nounwind readonly
+  %call1 = tail call i64 @strlen(i8* %domain) nounwind readonly
+  %add = add i64 %call, 1
+  %add2 = add i64 %add, %call1
+  %add3 = add i64 %add2, 1
+  %call4 = tail call noalias i8* @malloc(i64 %add3) nounwind
+  store i8* %call4, i8** %s, align 8
+  %tobool = icmp eq i8* %call4, null
+  br i1 %tobool, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call4, i8* %name, i64 %call, i1 false)
+  %arrayidx = getelementptr inbounds i8, i8* %call4, i64 %call
+  store i8 46, i8* %arrayidx, align 1
+; CHECK: store i8 46
+  %add.ptr5 = getelementptr inbounds i8, i8* %call4, i64 %add
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %add.ptr5, i8* %domain, i64 %call1, i1 false)
+  %arrayidx8 = getelementptr inbounds i8, i8* %call4, i64 %add2
+  store i8 0, i8* %arrayidx8, align 1
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  ret void
+}
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
+
+declare noalias i8* @malloc(i64) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind

Added: llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,897 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.init.trampoline(i8*, i8*, i8*)
+
+define void @test1(i32* %Q, i32* %P) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    store i32 0, i32* [[P:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %DEAD = load i32, i32* %Q
+  store i32 %DEAD, i32* %P
+  store i32 0, i32* %P
+  ret void
+}
+
+; PR8576 - Should delete store of 10 even though p/q are may aliases.
+define void @test2(i32 *%p, i32 *%q) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    store i32 20, i32* [[Q:%.*]], align 4
+; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 10, i32* %p, align 4
+  store i32 20, i32* %q, align 4
+  store i32 30, i32* %p, align 4
+  ret void
+}
+
+
+; PR8677
+ at g = global i32 1
+
+define i32 @test3(i32* %g_addr) nounwind {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[G_VALUE:%.*]] = load i32, i32* [[G_ADDR:%.*]], align 4
+; CHECK-NEXT:    store i32 -1, i32* @g, align 4
+; CHECK-NEXT:    store i32 [[G_VALUE]], i32* [[G_ADDR]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* @g, align 4
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %g_value = load i32, i32* %g_addr, align 4
+  store i32 -1, i32* @g, align 4
+  store i32 %g_value, i32* %g_addr, align 4
+  %tmp3 = load i32, i32* @g, align 4
+  ret i32 %tmp3
+}
+
+
+define void @test4(i32* %Q) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[Q:%.*]]
+; CHECK-NEXT:    store volatile i32 [[A]], i32* [[Q]]
+; CHECK-NEXT:    ret void
+;
+  %a = load i32, i32* %Q
+  store volatile i32 %a, i32* %Q
+  ret void
+}
+
+define void @test5(i32* %Q) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[A:%.*]] = load volatile i32, i32* [[Q:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %a = load volatile i32, i32* %Q
+  store i32 %a, i32* %Q
+  ret void
+}
+
+; Should delete store of 10 even though memset is a may-store to P (P and Q may
+; alias).
+define void @test6(i32 *%p, i8 *%q) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[Q:%.*]], i8 42, i64 900, i1 false)
+; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 10, i32* %p, align 4       ;; dead.
+  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false)
+  store i32 30, i32* %p, align 4
+  ret void
+}
+
+; Should delete store of 10 even though memset is a may-store to P (P and Q may
+; alias).
+define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) {
+; CHECK-LABEL: @test6_atomic(
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4)
+; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic i32 10, i32* %p unordered, align 4       ;; dead.
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4)
+  store atomic i32 30, i32* %p unordered, align 4
+  ret void
+}
+
+; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
+; alias).
+define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[Q:%.*]], i8* [[R:%.*]], i64 900, i1 false)
+; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 10, i32* %p, align 4       ;; dead.
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false)
+  store i32 30, i32* %p, align 4
+  ret void
+}
+
+; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
+; alias).
+define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) {
+; CHECK-LABEL: @test7_atomic(
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4)
+; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic i32 10, i32* %p unordered, align 4       ;; dead.
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4)
+  store atomic i32 30, i32* %p unordered, align 4
+  ret void
+}
+
+; Do not delete stores that are only partially killed.
+define i32 @test8() {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[V:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 1234567, i32* [[V]]
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[V]]
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %V = alloca i32
+  store i32 1234567, i32* %V
+  %V2 = bitcast i32* %V to i8*
+  store i8 0, i8* %V2
+  %X = load i32, i32* %V
+  ret i32 %X
+
+}
+
+
+; Test for byval handling.
+%struct.x = type { i32, i32, i32, i32 }
+define void @test9(%struct.x* byval  %a) nounwind  {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    ret void
+;
+  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
+  store i32 1, i32* %tmp2, align 4
+  ret void
+}
+
+; Test for inalloca handling.
+define void @test9_2(%struct.x* inalloca  %a) nounwind  {
+; CHECK-LABEL: @test9_2(
+; CHECK-NEXT:    ret void
+;
+  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
+  store i32 1, i32* %tmp2, align 4
+  ret void
+}
+
+; va_arg has fuzzy dependence, the store shouldn't be zapped.
+define double @test10(i8* %X) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i8*
+; CHECK-NEXT:    store i8* [[X:%.*]], i8** [[X_ADDR]]
+; CHECK-NEXT:    [[TMP_0:%.*]] = va_arg i8** [[X_ADDR]], double
+; CHECK-NEXT:    ret double [[TMP_0]]
+;
+  %X_addr = alloca i8*
+  store i8* %X, i8** %X_addr
+  %tmp.0 = va_arg i8** %X_addr, double
+  ret double %tmp.0
+}
+
+
+; DSE should delete the dead trampoline.
+declare void @test11f()
+define void @test11() {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    ret void
+;
+  %storage = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
+  %cast = getelementptr [10 x i8], [10 x i8]* %storage, i32 0, i32 0		; <i8*> [#uses=1]
+  call void @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null )		; <i8*> [#uses=1]
+  ret void
+}
+
+
+; PR2599 - load -> store to same address.
+define void @test12({ i32, i32 }* %x) nounwind  {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = sub i32 0, [[TMP8]]
+; CHECK-NEXT:    store i32 [[TMP17]], i32* [[TMP7]], align 4
+; CHECK-NEXT:    ret void
+;
+  %tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0
+  %tmp5 = load i32, i32* %tmp4, align 4
+  %tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1
+  %tmp8 = load i32, i32* %tmp7, align 4
+  %tmp17 = sub i32 0, %tmp8
+  store i32 %tmp5, i32* %tmp4, align 4
+  store i32 %tmp17, i32* %tmp7, align 4
+  ret void
+}
+
+
+; %P doesn't escape, the DEAD instructions should be removed.
+declare void @test13f()
+define i32* @test13() {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[PTR:%.*]] = tail call i8* @malloc(i32 4)
+; CHECK-NEXT:    [[P:%.*]] = bitcast i8* [[PTR]] to i32*
+; CHECK-NEXT:    call void @test13f()
+; CHECK-NEXT:    store i32 0, i32* [[P]]
+; CHECK-NEXT:    ret i32* [[P]]
+;
+  %ptr = tail call i8* @malloc(i32 4)
+  %P = bitcast i8* %ptr to i32*
+  %DEAD = load i32, i32* %P
+  %DEAD2 = add i32 %DEAD, 1
+  store i32 %DEAD2, i32* %P
+  call void @test13f( )
+  store i32 0, i32* %P
+  ret i32* %P
+}
+
+define i32 addrspace(1)* @test13_addrspacecast() {
+; CHECK-LABEL: @test13_addrspacecast(
+; CHECK-NEXT:    [[P:%.*]] = tail call i8* @malloc(i32 4)
+; CHECK-NEXT:    [[P_BC:%.*]] = bitcast i8* [[P]] to i32*
+; CHECK-NEXT:    [[P:%.*]] = addrspacecast i32* [[P_BC]] to i32 addrspace(1)*
+; CHECK-NEXT:    call void @test13f()
+; CHECK-NEXT:    store i32 0, i32 addrspace(1)* [[P]]
+; CHECK-NEXT:    ret i32 addrspace(1)* [[P]]
+;
+  %p = tail call i8* @malloc(i32 4)
+  %p.bc = bitcast i8* %p to i32*
+  %P = addrspacecast i32* %p.bc to i32 addrspace(1)*
+  %DEAD = load i32, i32 addrspace(1)* %P
+  %DEAD2 = add i32 %DEAD, 1
+  store i32 %DEAD2, i32 addrspace(1)* %P
+  call void @test13f( )
+  store i32 0, i32 addrspace(1)* %P
+  ret i32 addrspace(1)* %P
+}
+
+declare noalias i8* @malloc(i32)
+declare noalias i8* @calloc(i32, i32)
+
+
+define void @test14(i32* %Q) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    ret void
+;
+  %P = alloca i32
+  %DEAD = load i32, i32* %Q
+  store i32 %DEAD, i32* %P
+  ret void
+
+}
+
+
+; PR8701
+
+;; Fully dead overwrite of memcpy.
+define void @test15(i8* %P, i8* %Q) nounwind ssp {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  ret void
+}
+
+;; Fully dead overwrite of memcpy.
+define void @test15_atomic(i8* %P, i8* %Q) nounwind ssp {
+; CHECK-LABEL: @test15_atomic(
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  ret void
+}
+
+;; Fully dead overwrite of memcpy.
+define void @test15_atomic_weaker(i8* %P, i8* %Q) nounwind ssp {
+; CHECK-LABEL: @test15_atomic_weaker(
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false)
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  ret void
+}
+
+;; Fully dead overwrite of memcpy.
+define void @test15_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp {
+; CHECK-LABEL: @test15_atomic_weaker_2(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false)
+  ret void
+}
+
+;; Full overwrite of smaller memcpy.
+define void @test16(i8* %P, i8* %Q) nounwind ssp {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  ret void
+}
+
+;; Full overwrite of smaller memcpy.
+define void @test16_atomic(i8* %P, i8* %Q) nounwind ssp {
+; CHECK-LABEL: @test16_atomic(
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1)
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  ret void
+}
+
+;; Full overwrite of smaller memory where overwrite has stronger atomicity
+define void @test16_atomic_weaker(i8* %P, i8* %Q) nounwind ssp {
+; CHECK-LABEL: @test16_atomic_weaker(
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i1 false)
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  ret void
+}
+
+;; Full overwrite of smaller memory where overwrite has weaker atomicity.
+define void @test16_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp {
+; CHECK-LABEL: @test16_atomic_weaker_2(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false)
+  ret void
+}
+
+;; Overwrite of memset by memcpy.
+define void @test17(i8* %P, i8* noalias %Q) nounwind ssp {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  ret void
+}
+
+;; Overwrite of memset by memcpy.
+define void @test17_atomic(i8* %P, i8* noalias %Q) nounwind ssp {
+; CHECK-LABEL: @test17_atomic(
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1)
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  ret void
+}
+
+;; Overwrite of memset by memcpy. Overwrite is stronger atomicity. We can
+;; remove the memset.
+define void @test17_atomic_weaker(i8* %P, i8* noalias %Q) nounwind ssp {
+; CHECK-LABEL: @test17_atomic_weaker(
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i1 false)
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  ret void
+}
+
+;; Overwrite of memset by memcpy. Overwrite is weaker atomicity. We can remove
+;; the memset.
+define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp {
+; CHECK-LABEL: @test17_atomic_weaker_2(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false)
+  ret void
+}
+
+; Should not delete the volatile memset.
+define void @test17v(i8* %P, i8* %Q) nounwind ssp {
+; CHECK-LABEL: @test17v(
+; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 42, i64 8, i1 true)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  ret void
+}
+
+; PR8728
+; Do not delete instruction where possible situation is:
+; A = B
+; A = A
+;
+; NB! See PR11763 - currently LLVM allows memcpy's source and destination to be
+; equal (but not inequal and overlapping).
+define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+  ret void
+}
+
+define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp {
+; CHECK-LABEL: @test18_atomic(
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1)
+  ret void
+}
+
+
+; The store here is not dead because the byval call reads it.
+declare void @test19f({i32}* byval align 4 %P)
+
+define void @test19({i32} * nocapture byval align 4 %arg5) nounwind ssp {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds { i32 }, { i32 }* [[ARG5:%.*]], i32 0, i32 0
+; CHECK-NEXT:    store i32 912, i32* [[TMP7]]
+; CHECK-NEXT:    call void @test19f({ i32 }* byval align 4 [[ARG5]])
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp7 = getelementptr inbounds {i32}, {i32}* %arg5, i32 0, i32 0
+  store i32 912, i32* %tmp7
+  call void @test19f({i32}* byval align 4 %arg5)
+  ret void
+
+}
+
+define void @test20() {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    ret void
+;
+  %m = call i8* @malloc(i32 24)
+  store i8 0, i8* %m
+  ret void
+}
+
+define void @test21() {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    ret void
+;
+  %m = call i8* @calloc(i32 9, i32 7)
+  store i8 0, i8* %m
+  ret void
+}
+
+define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    ret void
+;
+  %k.addr = alloca i32
+  %m.addr = alloca i32
+  %k.addr.m.addr = select i1 %i, i32* %k.addr, i32* %m.addr
+  store i32 0, i32* %k.addr.m.addr, align 4
+  ret void
+}
+
+; PR13547
+declare noalias i8* @strdup(i8* nocapture) nounwind
+define noalias i8* @test23() nounwind uwtable ssp {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[X:%.*]] = alloca [2 x i8], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 0
+; CHECK-NEXT:    store i8 97, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 1
+; CHECK-NEXT:    store i8 0, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[CALL:%.*]] = call i8* @strdup(i8* [[ARRAYIDX]]) #1
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+  %x = alloca [2 x i8], align 1
+  %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %x, i64 0, i64 0
+  store i8 97, i8* %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds [2 x i8], [2 x i8]* %x, i64 0, i64 1
+  store i8 0, i8* %arrayidx1, align 1
+  %call = call i8* @strdup(i8* %arrayidx) nounwind
+  ret i8* %call
+}
+
+; Make sure same sized store to later element is deleted
+define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A:%.*]], i64 0, i64 0
+; CHECK-NEXT:    store i32 [[B:%.*]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1
+; CHECK-NEXT:    store i32 [[C:%.*]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    ret void
+;
+  %1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
+  store i32 0, i32* %1, align 4
+  %2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
+  store i32 0, i32* %2, align 4
+  %3 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
+  store i32 %b, i32* %3, align 4
+  %4 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
+  store i32 %c, i32* %4, align 4
+  ret void
+}
+
+; Check another case like PR13547 where strdup is not like malloc.
+define i8* @test25(i8* %p) nounwind {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[P_4:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 4
+; CHECK-NEXT:    [[TMP:%.*]] = load i8, i8* [[P_4]], align 1
+; CHECK-NEXT:    store i8 0, i8* [[P_4]], align 1
+; CHECK-NEXT:    [[Q:%.*]] = call i8* @strdup(i8* [[P]]) #4
+; CHECK-NEXT:    store i8 [[TMP]], i8* [[P_4]], align 1
+; CHECK-NEXT:    ret i8* [[Q]]
+;
+  %p.4 = getelementptr i8, i8* %p, i64 4
+  %tmp = load i8, i8* %p.4, align 1
+  store i8 0, i8* %p.4, align 1
+  %q = call i8* @strdup(i8* %p) nounwind optsize
+  store i8 %tmp, i8* %p.4, align 1
+  ret i8* %q
+}
+
+; Remove redundant store if loaded value is in another block.
+define i32 @test26(i1 %c, i32* %p) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %v = load i32, i32* %p, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  store i32 %v, i32* %p, align 4
+  br label %bb3
+bb3:
+  ret i32 0
+}
+
+; Remove redundant store if loaded value is in another block.
+define i32 @test27(i1 %c, i32* %p) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %v = load i32, i32* %p, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  br label %bb3
+bb3:
+  store i32 %v, i32* %p, align 4
+  ret i32 0
+}
+
+; Don't remove redundant store because of may-aliased store.
+define i32 @test28(i1 %c, i32* %p, i32* %p2, i32 %i) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    store i32 [[I:%.*]], i32* [[P2:%.*]], align 4
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %v = load i32, i32* %p, align 4
+
+  ; Might overwrite value at %p
+  store i32 %i, i32* %p2, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  br label %bb3
+bb3:
+  store i32 %v, i32* %p, align 4
+  ret i32 0
+}
+
+; Don't remove redundant store because of may-aliased store.
+define i32 @test29(i1 %c, i32* %p, i32* %p2, i32 %i) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    store i32 [[I:%.*]], i32* [[P2:%.*]], align 4
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %v = load i32, i32* %p, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  ; Might overwrite value at %p
+  store i32 %i, i32* %p2, align 4
+  br label %bb3
+bb3:
+  store i32 %v, i32* %p, align 4
+  ret i32 0
+}
+
+declare void @unknown_func()
+
+; Don't remove redundant store because of unknown call.
+define i32 @test30(i1 %c, i32* %p, i32 %i) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @unknown_func()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %v = load i32, i32* %p, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  ; Might overwrite value at %p
+  call void @unknown_func()
+  br label %bb3
+bb3:
+  store i32 %v, i32* %p, align 4
+  ret i32 0
+}
+
+; Remove redundant store if loaded value is in another block inside a loop.
+define i32 @test31(i1 %c, i32* %p, i32 %i) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %v = load i32, i32* %p, align 4
+  br label %bb1
+bb1:
+  store i32 %v, i32* %p, align 4
+  br i1 undef, label %bb1, label %bb2
+bb2:
+  ret i32 0
+}
+
+; Don't remove redundant store in a loop with a may-alias store.
+define i32 @test32(i1 %c, i32* %p, i32 %i) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
+; CHECK-NEXT:    call void @unknown_func()
+; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %v = load i32, i32* %p, align 4
+  br label %bb1
+bb1:
+  store i32 %v, i32* %p, align 4
+  ; Might read and overwrite value at %p
+  call void @unknown_func()
+  br i1 undef, label %bb1, label %bb2
+bb2:
+  ret i32 0
+}
+
+; Remove redundant store, which is in the lame loop as the load.
+define i32 @test33(i1 %c, i32* %p, i32 %i) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @unknown_func()
+; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB3:%.*]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br label %bb1
+bb1:
+  %v = load i32, i32* %p, align 4
+  br label %bb2
+bb2:
+  store i32 %v, i32* %p, align 4
+  ; Might read and overwrite value at %p, but doesn't matter.
+  call void @unknown_func()
+  br i1 undef, label %bb1, label %bb3
+bb3:
+  ret i32 0
+}
+
+; Don't remove redundant store: unknown_func could unwind
+define void @test34(i32* noalias %p) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    store i32 1, i32* [[P:%.*]]
+; CHECK-NEXT:    call void @unknown_func()
+; CHECK-NEXT:    store i32 0, i32* [[P]]
+; CHECK-NEXT:    ret void
+;
+  store i32 1, i32* %p
+  call void @unknown_func()
+  store i32 0, i32* %p
+  ret void
+}
+
+; Remove redundant store even with an unwinding function in the same block
+define void @test35(i32* noalias %p) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:    call void @unknown_func()
+; CHECK-NEXT:    store i32 0, i32* [[P:%.*]]
+; CHECK-NEXT:    ret void
+;
+  call void @unknown_func()
+  store i32 1, i32* %p
+  store i32 0, i32* %p
+  ret void
+}
+
+; We cannot optimize away the first memmove since %P could overlap with %Q.
+define void @test36(i8* %P, i8* %Q) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[Q]], i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  ret void
+}
+
+define void @test36_atomic(i8* %P, i8* %Q) {
+; CHECK-LABEL: @test36_atomic(
+; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
+; CHECK-NEXT:    ret void
+;
+
+  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  ret void
+}
+
+define void @test37(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+  ret void
+}
+
+define void @test37_atomic(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test37_atomic(
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    ret void
+;
+
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1)
+  ret void
+}
+
+; Same caveat about memcpy as in @test18 applies here.
+define void @test38(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    ret void
+;
+
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
+  ret void
+}
+
+define void @test38_atomic(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test38_atomic(
+; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    ret void
+;
+
+  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1)
+  ret void
+}
+
+define void @test39(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 8, i1 false)
+; CHECK-NEXT:    ret void
+;
+
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false)
+  ret void
+}
+
+define void @test39_atomic(i8* %P, i8* %Q, i8* %R) {
+; CHECK-LABEL: @test39_atomic(
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 8, i32 1)
+; CHECK-NEXT:    ret void
+;
+
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 8, i32 1)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
+declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32)