[llvm] r358552 - Revert "Temporarily Revert "Add basic loop fusion pass.""

Eric Christopher via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 16 21:53:01 PDT 2019


Added: llvm/trunk/test/Transforms/SROA/basictest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/basictest.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/basictest.ll (added)
+++ llvm/trunk/test/Transforms/SROA/basictest.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,1918 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+; RUN: opt < %s -passes=sroa -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+define i32 @test0() {
+; CHECK-LABEL: @test0(
+; CHECK-NOT: alloca
+; CHECK: ret i32
+
+entry:
+  %a1 = alloca i32
+  %a2 = alloca float
+
+  %a1.i8 = bitcast i32* %a1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %a1.i8)
+
+  store i32 0, i32* %a1
+  %v1 = load i32, i32* %a1
+
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %a1.i8)
+
+  %a2.i8 = bitcast float* %a2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %a2.i8)
+
+  store float 0.0, float* %a2
+  %v2 = load float , float * %a2
+  %v2.int = bitcast float %v2 to i32
+  %sum1 = add i32 %v1, %v2.int
+
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %a2.i8)
+
+  ret i32 %sum1
+}
+
+define i32 @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NOT: alloca
+; CHECK: ret i32 0
+
+entry:
+  %X = alloca { i32, float }
+  %Y = getelementptr { i32, float }, { i32, float }* %X, i64 0, i32 0
+  store i32 0, i32* %Y
+  %Z = load i32, i32* %Y
+  ret i32 %Z
+}
+
+define i64 @test2(i64 %X) {
+; CHECK-LABEL: @test2(
+; CHECK-NOT: alloca
+; CHECK: ret i64 %X
+
+entry:
+  %A = alloca [8 x i8]
+  %B = bitcast [8 x i8]* %A to i64*
+  store i64 %X, i64* %B
+  br label %L2
+
+L2:
+  %Z = load i64, i64* %B
+  ret i64 %Z
+}
+
+define void @test3(i8* %dst, i8* align 8 %src) {
+; CHECK-LABEL: @test3(
+
+entry:
+  %a = alloca [300 x i8]
+; CHECK-NOT:  alloca
+; CHECK:      %[[test3_a1:.*]] = alloca [42 x i8]
+; CHECK-NEXT: %[[test3_a2:.*]] = alloca [99 x i8]
+; CHECK-NEXT: %[[test3_a3:.*]] = alloca [16 x i8]
+; CHECK-NEXT: %[[test3_a4:.*]] = alloca [42 x i8]
+; CHECK-NEXT: %[[test3_a5:.*]] = alloca [7 x i8]
+; CHECK-NEXT: %[[test3_a6:.*]] = alloca [7 x i8]
+; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8]
+
+  %b = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* align 8 %src, i32 300, i1 false), !tbaa !0
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %src, i32 42, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42
+; CHECK-NEXT: %[[test3_r1:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 142
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 158
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 200
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 207
+; CHECK-NEXT: %[[test3_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 208
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 215
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}}), !tbaa [[TAG_0:!.*]]
+
+  ; Clobber a single element of the array, this should be promotable, and be deleted.
+  %c = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 42
+  store i8 0, i8* %c
+
+  ; Make a sequence of overlapping stores to the array. These overlap both in
+  ; forward strides and in shrinking accesses.
+  %overlap.1.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 142
+  %overlap.2.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 143
+  %overlap.3.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 144
+  %overlap.4.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 145
+  %overlap.5.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 146
+  %overlap.6.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 147
+  %overlap.7.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 148
+  %overlap.8.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 149
+  %overlap.9.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 150
+  %overlap.1.i16 = bitcast i8* %overlap.1.i8 to i16*
+  %overlap.1.i32 = bitcast i8* %overlap.1.i8 to i32*
+  %overlap.1.i64 = bitcast i8* %overlap.1.i8 to i64*
+  %overlap.2.i64 = bitcast i8* %overlap.2.i8 to i64*
+  %overlap.3.i64 = bitcast i8* %overlap.3.i8 to i64*
+  %overlap.4.i64 = bitcast i8* %overlap.4.i8 to i64*
+  %overlap.5.i64 = bitcast i8* %overlap.5.i8 to i64*
+  %overlap.6.i64 = bitcast i8* %overlap.6.i8 to i64*
+  %overlap.7.i64 = bitcast i8* %overlap.7.i8 to i64*
+  %overlap.8.i64 = bitcast i8* %overlap.8.i8 to i64*
+  %overlap.9.i64 = bitcast i8* %overlap.9.i8 to i64*
+  store i8 1, i8* %overlap.1.i8, !tbaa !3
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0
+; CHECK-NEXT: store i8 1, i8* %[[gep]], !tbaa [[TAG_3:!.*]]
+  store i16 1, i16* %overlap.1.i16, !tbaa !5
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i16*
+; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_5:!.*]]
+  store i32 1, i32* %overlap.1.i32, !tbaa !7
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i32*
+; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_7:!.*]]
+  store i64 1, i64* %overlap.1.i64, !tbaa !9
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i64*
+; CHECK-NEXT: store i64 1, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_9:!.*]]
+  store i64 2, i64* %overlap.2.i64, !tbaa !11
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 1
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 2, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_11:!.*]]
+  store i64 3, i64* %overlap.3.i64, !tbaa !13
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 2
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 3, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_13:!.*]]
+  store i64 4, i64* %overlap.4.i64, !tbaa !15
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 3
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 4, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_15:!.*]]
+  store i64 5, i64* %overlap.5.i64, !tbaa !17
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 4
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 5, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_17:!.*]]
+  store i64 6, i64* %overlap.6.i64, !tbaa !19
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 5
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 6, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_19:!.*]]
+  store i64 7, i64* %overlap.7.i64, !tbaa !21
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 6
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 7, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_21:!.*]]
+  store i64 8, i64* %overlap.8.i64, !tbaa !23
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 7
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 8, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_23:!.*]]
+  store i64 9, i64* %overlap.9.i64, !tbaa !25
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 8
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 9, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_25:!.*]]
+
+  ; Make two sequences of overlapping stores with more gaps and irregularities.
+  %overlap2.1.0.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 200
+  %overlap2.1.1.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 201
+  %overlap2.1.2.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 202
+  %overlap2.1.3.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 203
+
+  %overlap2.2.0.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 208
+  %overlap2.2.1.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 209
+  %overlap2.2.2.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 210
+  %overlap2.2.3.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 211
+
+  %overlap2.1.0.i16 = bitcast i8* %overlap2.1.0.i8 to i16*
+  %overlap2.1.0.i32 = bitcast i8* %overlap2.1.0.i8 to i32*
+  %overlap2.1.1.i32 = bitcast i8* %overlap2.1.1.i8 to i32*
+  %overlap2.1.2.i32 = bitcast i8* %overlap2.1.2.i8 to i32*
+  %overlap2.1.3.i32 = bitcast i8* %overlap2.1.3.i8 to i32*
+  store i8 1,  i8*  %overlap2.1.0.i8, !tbaa !27
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
+; CHECK-NEXT: store i8 1, i8* %[[gep]], !tbaa [[TAG_27:!.*]]
+  store i16 1, i16* %overlap2.1.0.i16, !tbaa !29
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i16*
+; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_29:!.*]]
+  store i32 1, i32* %overlap2.1.0.i32, !tbaa !31
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i32*
+; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_31:!.*]]
+  store i32 2, i32* %overlap2.1.1.i32, !tbaa !33
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 1
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 2, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_33:!.*]]
+  store i32 3, i32* %overlap2.1.2.i32, !tbaa !35
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 2
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 3, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_35:!.*]]
+  store i32 4, i32* %overlap2.1.3.i32, !tbaa !37
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 3
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 4, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_37:!.*]]
+
+  %overlap2.2.0.i32 = bitcast i8* %overlap2.2.0.i8 to i32*
+  %overlap2.2.1.i16 = bitcast i8* %overlap2.2.1.i8 to i16*
+  %overlap2.2.1.i32 = bitcast i8* %overlap2.2.1.i8 to i32*
+  %overlap2.2.2.i32 = bitcast i8* %overlap2.2.2.i8 to i32*
+  %overlap2.2.3.i32 = bitcast i8* %overlap2.2.3.i8 to i32*
+  store i32 1, i32* %overlap2.2.0.i32, !tbaa !39
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a6]] to i32*
+; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_39:!.*]]
+  store i8 1,  i8*  %overlap2.2.1.i8, !tbaa !41
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1
+; CHECK-NEXT: store i8 1, i8* %[[gep]], !tbaa [[TAG_41:!.*]]
+  store i16 1, i16* %overlap2.2.1.i16, !tbaa !43
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_43:!.*]]
+  store i32 1, i32* %overlap2.2.1.i32, !tbaa !45
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_45:!.*]]
+  store i32 3, i32* %overlap2.2.2.i32, !tbaa !47
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 2
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 3, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_47:!.*]]
+  store i32 4, i32* %overlap2.2.3.i32, !tbaa !49
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 3
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 4, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_49:!.*]]
+
+  %overlap2.prefix = getelementptr i8, i8* %overlap2.1.1.i8, i64 -4
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i1 false), !tbaa !51
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 39
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %src, i32 3, {{.*}}), !tbaa [[TAG_51:!.*]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 3
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 5, {{.*}}), !tbaa [[TAG_51]]
+
+  ; Bridge between the overlapping areas
+  call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i1 false), !tbaa !53
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 2
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 5, {{.*}}), !tbaa [[TAG_53:!.*]]
+; ...promoted i8 store...
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 2, {{.*}}), !tbaa [[TAG_53]]
+
+  ; Entirely within the second overlap.
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i1 false), !tbaa !55
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5, {{.*}}), !tbaa [[TAG_55:!.*]]
+
+  ; Trailing past the second overlap.
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i1 false), !tbaa !57
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 2
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5, {{.*}}), !tbaa [[TAG_57:!.*]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 5
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 3, {{.*}}), !tbaa [[TAG_57]]
+
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i1 false), !tbaa !59
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 42, {{.*}}), !tbaa [[TAG_59:!.*]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 42
+; CHECK-NEXT: store i8 0, i8* %[[gep]], {{.*}}, !tbaa [[TAG_59]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}), !tbaa [[TAG_59]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 142
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_59]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 158
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_59]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 200
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_59]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 207
+; CHECK-NEXT: store i8 42, i8* %[[gep]], {{.*}}, !tbaa [[TAG_59]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 208
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_59]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 215
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}}), !tbaa [[TAG_59]]
+
+  ret void
+}
+
+define void @test4(i8* %dst, i8* %src) {
+; CHECK-LABEL: @test4(
+
+entry:
+  %a = alloca [100 x i8]
+; CHECK-NOT:  alloca
+; CHECK:      %[[test4_a1:.*]] = alloca [20 x i8]
+; CHECK-NEXT: %[[test4_a2:.*]] = alloca [7 x i8]
+; CHECK-NEXT: %[[test4_a3:.*]] = alloca [10 x i8]
+; CHECK-NEXT: %[[test4_a4:.*]] = alloca [7 x i8]
+; CHECK-NEXT: %[[test4_a5:.*]] = alloca [7 x i8]
+; CHECK-NEXT: %[[test4_a6:.*]] = alloca [40 x i8]
+
+  %b = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i1 false), !tbaa !0
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 20, {{.*}}), !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 20
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: %[[test4_r1:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 22
+; CHECK-NEXT: %[[test4_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 23
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 30
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10, {{.*}}), !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 40
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: %[[test4_r3:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42
+; CHECK-NEXT: %[[test4_r4:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 50
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: %[[test4_r5:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 52
+; CHECK-NEXT: %[[test4_r6:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 53
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 60
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40, {{.*}}), !tbaa [[TAG_0]]
+
+  %a.src.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 20
+  %a.dst.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 40
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i1 false), !tbaa !3
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_3]]
+
+  ; Clobber a single element of the array, this should be promotable, and be deleted.
+  %c = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 42
+  store i8 0, i8* %c
+
+  %a.src.2 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 50
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i1 false), !tbaa !5
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_5]]
+
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i1 false), !tbaa !7
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 20, {{.*}}), !tbaa [[TAG_7]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 20
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 22
+; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 23
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_7]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 30
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10, {{.*}}), !tbaa [[TAG_7]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 40
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 42
+; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_7]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 50
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 52
+; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 53
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_7]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 60
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40, {{.*}}), !tbaa [[TAG_7]]
+
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+
+define i16 @test5() {
+; CHECK-LABEL: @test5(
+; CHECK-NOT: alloca float
+; CHECK:      %[[cast:.*]] = bitcast float 0.0{{.*}} to i32
+; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16
+; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16
+; CHECK-NEXT: ret i16 %[[trunc]]
+
+entry:
+  %a = alloca [4 x i8]
+  %fptr = bitcast [4 x i8]* %a to float*
+  store float 0.0, float* %fptr
+  %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 2
+  %iptr = bitcast i8* %ptr to i16*
+  %val = load i16, i16* %iptr
+  ret i16 %val
+}
+
+define i32 @test6() {
+; CHECK-LABEL: @test6(
+; CHECK: alloca i32
+; CHECK-NEXT: store volatile i32
+; CHECK-NEXT: load i32, i32*
+; CHECK-NEXT: ret i32
+
+entry:
+  %a = alloca [4 x i8]
+  %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i1 true)
+  %iptr = bitcast i8* %ptr to i32*
+  %val = load i32, i32* %iptr
+  ret i32 %val
+}
+
+define void @test7(i8* %src, i8* %dst) {
+; CHECK-LABEL: @test7(
+; CHECK: alloca i32
+; CHECK-NEXT: bitcast i8* %src to i32*
+; CHECK-NEXT: load volatile i32, {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: store volatile i32 {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: bitcast i8* %dst to i32*
+; CHECK-NEXT: load volatile i32, {{.*}}, !tbaa [[TAG_3]]
+; CHECK-NEXT: store volatile i32 {{.*}}, !tbaa [[TAG_3]]
+; CHECK-NEXT: ret
+
+entry:
+  %a = alloca [4 x i8]
+  %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 true), !tbaa !0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 true), !tbaa !3
+  ret void
+}
+
+
+%S1 = type { i32, i32, [16 x i8] }
+%S2 = type { %S1*, %S2* }
+
+define %S2 @test8(%S2* %s2) {
+; CHECK-LABEL: @test8(
+entry:
+  %new = alloca %S2
+; CHECK-NOT: alloca
+
+  %s2.next.ptr = getelementptr %S2, %S2* %s2, i64 0, i32 1
+  %s2.next = load %S2*, %S2** %s2.next.ptr, !tbaa !0
+; CHECK:      %[[gep:.*]] = getelementptr %S2, %S2* %s2, i64 0, i32 1
+; CHECK-NEXT: %[[next:.*]] = load %S2*, %S2** %[[gep]], !tbaa [[TAG_0]]
+
+  %s2.next.s1.ptr = getelementptr %S2, %S2* %s2.next, i64 0, i32 0
+  %s2.next.s1 = load %S1*, %S1** %s2.next.s1.ptr, !tbaa !3
+  %new.s1.ptr = getelementptr %S2, %S2* %new, i64 0, i32 0
+  store %S1* %s2.next.s1, %S1** %new.s1.ptr, !tbaa !5
+  %s2.next.next.ptr = getelementptr %S2, %S2* %s2.next, i64 0, i32 1
+  %s2.next.next = load %S2*, %S2** %s2.next.next.ptr, !tbaa !7
+  %new.next.ptr = getelementptr %S2, %S2* %new, i64 0, i32 1
+  store %S2* %s2.next.next, %S2** %new.next.ptr, !tbaa !9
+; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2, %S2* %[[next]], i64 0, i32 0
+; CHECK-NEXT: %[[next_s1:.*]] = load %S1*, %S1** %[[gep]], !tbaa [[TAG_3]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2, %S2* %[[next]], i64 0, i32 1
+; CHECK-NEXT: %[[next_next:.*]] = load %S2*, %S2** %[[gep]], !tbaa [[TAG_7]]
+
+  %new.s1 = load %S1*, %S1** %new.s1.ptr
+  %result1 = insertvalue %S2 undef, %S1* %new.s1, 0
+; CHECK-NEXT: %[[result1:.*]] = insertvalue %S2 undef, %S1* %[[next_s1]], 0
+  %new.next = load %S2*, %S2** %new.next.ptr
+  %result2 = insertvalue %S2 %result1, %S2* %new.next, 1
+; CHECK-NEXT: %[[result2:.*]] = insertvalue %S2 %[[result1]], %S2* %[[next_next]], 1
+  ret %S2 %result2
+; CHECK-NEXT: ret %S2 %[[result2]]
+}
+
+define i64 @test9() {
+; Ensure we can handle loads off the end of an alloca even when wrapped in
+; weird bit casts and types. This is valid IR due to the alignment and masking
+; off the bits past the end of the alloca.
+;
+; CHECK-LABEL: @test9(
+; CHECK-NOT: alloca
+; CHECK:      %[[b2:.*]] = zext i8 26 to i64
+; CHECK-NEXT: %[[s2:.*]] = shl i64 %[[b2]], 16
+; CHECK-NEXT: %[[m2:.*]] = and i64 undef, -16711681
+; CHECK-NEXT: %[[i2:.*]] = or i64 %[[m2]], %[[s2]]
+; CHECK-NEXT: %[[b1:.*]] = zext i8 0 to i64
+; CHECK-NEXT: %[[s1:.*]] = shl i64 %[[b1]], 8
+; CHECK-NEXT: %[[m1:.*]] = and i64 %[[i2]], -65281
+; CHECK-NEXT: %[[i1:.*]] = or i64 %[[m1]], %[[s1]]
+; CHECK-NEXT: %[[b0:.*]] = zext i8 0 to i64
+; CHECK-NEXT: %[[m0:.*]] = and i64 %[[i1]], -256
+; CHECK-NEXT: %[[i0:.*]] = or i64 %[[m0]], %[[b0]]
+; CHECK-NEXT: %[[result:.*]] = and i64 %[[i0]], 16777215
+; CHECK-NEXT: ret i64 %[[result]]
+
+entry:
+  %a = alloca { [3 x i8] }, align 8
+  %gep1 = getelementptr inbounds { [3 x i8] }, { [3 x i8] }* %a, i32 0, i32 0, i32 0
+  store i8 0, i8* %gep1, align 1
+  %gep2 = getelementptr inbounds { [3 x i8] }, { [3 x i8] }* %a, i32 0, i32 0, i32 1
+  store i8 0, i8* %gep2, align 1
+  %gep3 = getelementptr inbounds { [3 x i8] }, { [3 x i8] }* %a, i32 0, i32 0, i32 2
+  store i8 26, i8* %gep3, align 1
+  %cast = bitcast { [3 x i8] }* %a to { i64 }*
+  %elt = getelementptr inbounds { i64 }, { i64 }* %cast, i32 0, i32 0
+  %load = load i64, i64* %elt
+  %result = and i64 %load, 16777215
+  ret i64 %result
+}
+
+define %S2* @test10() {
+; CHECK-LABEL: @test10(
+; CHECK-NOT: alloca %S2*
+; CHECK: ret %S2* null
+
+entry:
+  %a = alloca [8 x i8]
+  %ptr = getelementptr [8 x i8], [8 x i8]* %a, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i1 false)
+  %s2ptrptr = bitcast i8* %ptr to %S2**
+  %s2ptr = load %S2*, %S2** %s2ptrptr
+  ret %S2* %s2ptr
+}
+
+define i32 @test11() {
+; CHECK-LABEL: @test11(
+; CHECK-NOT: alloca
+; CHECK: ret i32 0
+
+entry:
+  %X = alloca i32
+  br i1 undef, label %good, label %bad
+
+good:
+  %Y = getelementptr i32, i32* %X, i64 0
+  store i32 0, i32* %Y
+  %Z = load i32, i32* %Y
+  ret i32 %Z
+
+bad:
+  %Y2 = getelementptr i32, i32* %X, i64 1
+  store i32 0, i32* %Y2
+  %Z2 = load i32, i32* %Y2
+  ret i32 %Z2
+}
+
+define i8 @test12() {
+; We fully promote these to the i24 load or store size, resulting in just masks
+; and other operations that instcombine will fold, but no alloca.
+;
+; CHECK-LABEL: @test12(
+
+entry:
+  %a = alloca [3 x i8]
+  %b = alloca [3 x i8]
+; CHECK-NOT: alloca
+
+  %a0ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 0
+  store i8 0, i8* %a0ptr
+  %a1ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 1
+  store i8 0, i8* %a1ptr
+  %a2ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 2
+  store i8 0, i8* %a2ptr
+  %aiptr = bitcast [3 x i8]* %a to i24*
+  %ai = load i24, i24* %aiptr
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16
+; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535
+; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[shift2]]
+; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
+; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
+; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
+; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], -256
+; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[ext0]]
+
+  %biptr = bitcast [3 x i8]* %b to i24*
+  store i24 %ai, i24* %biptr
+  %b0ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 0
+  %b0 = load i8, i8* %b0ptr
+  %b1ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 1
+  %b1 = load i8, i8* %b1ptr
+  %b2ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 2
+  %b2 = load i8, i8* %b2ptr
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK:      %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8
+; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
+; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
+; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[insert0]], 16
+; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8
+
+  %bsum0 = add i8 %b0, %b1
+  %bsum1 = add i8 %bsum0, %b2
+  ret i8 %bsum1
+; CHECK:      %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
+; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
+; CHECK-NEXT: ret i8 %[[sum1]]
+}
+
+define i32 @test13() {
+; Ensure we don't crash and handle undefined loads that straddle the end of the
+; allocation.
+; CHECK-LABEL: @test13(
+; CHECK:      %[[value:.*]] = zext i8 0 to i16
+; CHECK-NEXT: %[[ret:.*]] = zext i16 %[[value]] to i32
+; CHECK-NEXT: ret i32 %[[ret]]
+
+entry:
+  %a = alloca [3 x i8], align 2
+  %b0ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 0
+  store i8 0, i8* %b0ptr
+  %b1ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 1
+  store i8 0, i8* %b1ptr
+  %b2ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 2
+  store i8 0, i8* %b2ptr
+  %iptrcast = bitcast [3 x i8]* %a to i16*
+  %iptrgep = getelementptr i16, i16* %iptrcast, i64 1
+  %i = load i16, i16* %iptrgep
+  %ret = zext i16 %i to i32
+  ret i32 %ret
+}
+
+%test14.struct = type { [3 x i32] }
+
+define void @test14(...) nounwind uwtable {
+; This is a strange case where we split allocas into promotable partitions, but
+; also gain enough data to prove they must be dead allocas due to GEPs that walk
+; across two adjacent allocas. Test that we don't try to promote or otherwise
+; do bad things to these dead allocas, they should just be removed.
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret void
+
+entry:
+  %a = alloca %test14.struct
+  %p = alloca %test14.struct*
+  %0 = bitcast %test14.struct* %a to i8*
+  %1 = getelementptr i8, i8* %0, i64 12
+  %2 = bitcast i8* %1 to %test14.struct*
+  %3 = getelementptr inbounds %test14.struct, %test14.struct* %2, i32 0, i32 0
+  %4 = getelementptr inbounds %test14.struct, %test14.struct* %a, i32 0, i32 0
+  %5 = bitcast [3 x i32]* %3 to i32*
+  %6 = bitcast [3 x i32]* %4 to i32*
+  %7 = load i32, i32* %6, align 4
+  store i32 %7, i32* %5, align 4
+  %8 = getelementptr inbounds i32, i32* %5, i32 1
+  %9 = getelementptr inbounds i32, i32* %6, i32 1
+  %10 = load i32, i32* %9, align 4
+  store i32 %10, i32* %8, align 4
+  %11 = getelementptr inbounds i32, i32* %5, i32 2
+  %12 = getelementptr inbounds i32, i32* %6, i32 2
+  %13 = load i32, i32* %12, align 4
+  store i32 %13, i32* %11, align 4
+  ret void
+}
+
+define i32 @test15(i1 %flag) nounwind uwtable {
+; Ensure that when there are dead instructions using an alloca that are not
+; loads or stores we still delete them during partitioning and rewriting.
+; Otherwise we'll go to promote them while thy still have unpromotable uses.
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   br label %loop
+; CHECK:      loop:
+; CHECK-NEXT:   br label %loop
+
+entry:
+  %l0 = alloca i64
+  %l1 = alloca i64
+  %l2 = alloca i64
+  %l3 = alloca i64
+  br label %loop
+
+loop:
+  %dead3 = phi i8* [ %gep3, %loop ], [ null, %entry ]
+
+  store i64 1879048192, i64* %l0, align 8
+  %bc0 = bitcast i64* %l0 to i8*
+  %gep0 = getelementptr i8, i8* %bc0, i64 3
+  %dead0 = bitcast i8* %gep0 to i64*
+
+  store i64 1879048192, i64* %l1, align 8
+  %bc1 = bitcast i64* %l1 to i8*
+  %gep1 = getelementptr i8, i8* %bc1, i64 3
+  %dead1 = getelementptr i8, i8* %gep1, i64 1
+
+  store i64 1879048192, i64* %l2, align 8
+  %bc2 = bitcast i64* %l2 to i8*
+  %gep2.1 = getelementptr i8, i8* %bc2, i64 1
+  %gep2.2 = getelementptr i8, i8* %bc2, i64 3
+  ; Note that this select should get visited multiple times due to using two
+  ; different GEPs off the same alloca. We should only delete it once.
+  %dead2 = select i1 %flag, i8* %gep2.1, i8* %gep2.2
+
+  store i64 1879048192, i64* %l3, align 8
+  %bc3 = bitcast i64* %l3 to i8*
+  %gep3 = getelementptr i8, i8* %bc3, i64 3
+
+  br label %loop
+}
+
+define void @test16(i8* %src, i8* %dst) {
+; Ensure that we can promote an alloca of [3 x i8] to an i24 SSA value.
+; CHECK-LABEL: @test16(
+; CHECK-NOT: alloca
+; CHECK:      %[[srccast:.*]] = bitcast i8* %src to i24*
+; CHECK-NEXT: load i24, i24* %[[srccast]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[dstcast:.*]] = bitcast i8* %dst to i24*
+; CHECK-NEXT: store i24 0, i24* %[[dstcast]], {{.*}}, !tbaa [[TAG_5]]
+; CHECK-NEXT: ret void
+
+entry:
+  %a = alloca [3 x i8]
+  %ptr = getelementptr [3 x i8], [3 x i8]* %a, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 false), !tbaa !0
+  %cast = bitcast i8* %ptr to i24*
+  store i24 0, i24* %cast, !tbaa !3
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 false), !tbaa !5
+  ret void
+}
+
+define void @test17(i8* %src, i8* %dst) {
+; Ensure that we can rewrite unpromotable memcpys which extend past the end of
+; the alloca.
+; CHECK-LABEL: @test17(
+; CHECK:      %[[a:.*]] = alloca [3 x i8]
+; CHECK-NEXT: %[[ptr:.*]] = getelementptr [3 x i8], [3 x i8]* %[[a]], i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[ptr]], i8* %src, {{.*}}), !tbaa [[TAG_0]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[ptr]], {{.*}}), !tbaa [[TAG_3]]
+; CHECK-NEXT: ret void
+
+entry:
+  %a = alloca [3 x i8]
+  %ptr = getelementptr [3 x i8], [3 x i8]* %a, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i1 true), !tbaa !0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i1 true), !tbaa !3
+  ret void
+}
+
+define void @test18(i8* %src, i8* %dst, i32 %size) {
+; Preserve transfer instrinsics with a variable size, even if they overlap with
+; fixed size operations. Further, continue to split and promote allocas preceding
+; the variable sized intrinsic.
+; CHECK-LABEL: @test18(
+; CHECK:      %[[a:.*]] = alloca [34 x i8]
+; CHECK:      %[[srcgep1:.*]] = getelementptr inbounds i8, i8* %src, i64 4
+; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32*
+; CHECK-NEXT: %[[srcload:.*]] = load i32, i32* %[[srccast1]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[agep1]], i8* %src, i32 %size, {{.*}}), !tbaa [[TAG_3]]
+; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[agep2]], i8 42, i32 %size, {{.*}}), !tbaa [[TAG_5]]
+; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32*
+; CHECK-NEXT: store i32 42, i32* %[[dstcast1]], {{.*}}, !tbaa [[TAG_9]]
+; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8, i8* %dst, i64 4
+; CHECK-NEXT: %[[dstcast2:.*]] = bitcast i8* %[[dstgep1]] to i32*
+; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]], {{.*}}, !tbaa [[TAG_9]]
+; CHECK-NEXT: %[[agep3:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* align 1 %[[agep3]], i32 %size, {{.*}}), !tbaa [[TAG_11]]
+; CHECK-NEXT: ret void
+
+entry:
+  %a = alloca [42 x i8]
+  %ptr = getelementptr [42 x i8], [42 x i8]* %a, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i1 false), !tbaa !0
+  %ptr2 = getelementptr [42 x i8], [42 x i8]* %a, i32 0, i32 8
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr2, i8* %src, i32 %size, i1 false), !tbaa !3
+  call void @llvm.memset.p0i8.i32(i8* %ptr2, i8 42, i32 %size, i1 false), !tbaa !5
+  %cast = bitcast i8* %ptr to i32*
+  store i32 42, i32* %cast, !tbaa !7
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i1 false), !tbaa !9
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr2, i32 %size, i1 false), !tbaa !11
+  ret void
+}
+
+%opaque = type opaque
+
+define i32 @test19(%opaque* %x) {
+; This input will cause us to try to compute a natural GEP when rewriting
+; pointers in such a way that we try to GEP through the opaque type. Previously,
+; a check for an unsized type was missing and this crashed. Ensure it behaves
+; reasonably now.
+; CHECK-LABEL: @test19(
+; CHECK-NOT: alloca
+; CHECK: ret i32 undef
+
+entry:
+  %a = alloca { i64, i8* }
+  %cast1 = bitcast %opaque* %x to i8*
+  %cast2 = bitcast { i64, i8* }* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast2, i8* %cast1, i32 16, i1 false)
+  %gep = getelementptr inbounds { i64, i8* }, { i64, i8* }* %a, i32 0, i32 0
+  %val = load i64, i64* %gep
+  ret i32 undef
+}
+
+define i32 @test20() {
+; Ensure we can track negative offsets (before the beginning of the alloca) and
+; negative relative offsets from offsets starting past the end of the alloca.
+; CHECK-LABEL: @test20(
+; CHECK-NOT: alloca
+; CHECK: %[[sum1:.*]] = add i32 1, 2
+; CHECK: %[[sum2:.*]] = add i32 %[[sum1]], 3
+; CHECK: ret i32 %[[sum2]]
+
+entry:
+  %a = alloca [3 x i32]
+  %gep1 = getelementptr [3 x i32], [3 x i32]* %a, i32 0, i32 0
+  store i32 1, i32* %gep1
+  %gep2.1 = getelementptr [3 x i32], [3 x i32]* %a, i32 0, i32 -2
+  %gep2.2 = getelementptr i32, i32* %gep2.1, i32 3
+  store i32 2, i32* %gep2.2
+  %gep3.1 = getelementptr [3 x i32], [3 x i32]* %a, i32 0, i32 14
+  %gep3.2 = getelementptr i32, i32* %gep3.1, i32 -12
+  store i32 3, i32* %gep3.2
+
+  %load1 = load i32, i32* %gep1
+  %load2 = load i32, i32* %gep2.2
+  %load3 = load i32, i32* %gep3.2
+  %sum1 = add i32 %load1, %load2
+  %sum2 = add i32 %sum1, %load3
+  ret i32 %sum2
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+define i8 @test21() {
+; Test allocations and offsets which border on overflow of the int64_t used
+; internally. This is really awkward to really test as LLVM doesn't really
+; support such extreme constructs cleanly.
+; CHECK-LABEL: @test21(
+; CHECK-NOT: alloca
+; CHECK: or i8 -1, -1
+
+entry:
+  %a = alloca [2305843009213693951 x i8]
+  %gep0 = getelementptr [2305843009213693951 x i8], [2305843009213693951 x i8]* %a, i64 0, i64 2305843009213693949
+  store i8 255, i8* %gep0
+  %gep1 = getelementptr [2305843009213693951 x i8], [2305843009213693951 x i8]* %a, i64 0, i64 -9223372036854775807
+  %gep2 = getelementptr i8, i8* %gep1, i64 -1
+  call void @llvm.memset.p0i8.i64(i8* %gep2, i8 0, i64 18446744073709551615, i1 false)
+  %gep3 = getelementptr i8, i8* %gep1, i64 9223372036854775807
+  %gep4 = getelementptr i8, i8* %gep3, i64 9223372036854775807
+  %gep5 = getelementptr i8, i8* %gep4, i64 -6917529027641081857
+  store i8 255, i8* %gep5
+  %cast1 = bitcast i8* %gep4 to i32*
+  store i32 0, i32* %cast1
+  %load = load i8, i8* %gep0
+  %gep6 = getelementptr i8, i8* %gep0, i32 1
+  %load2 = load i8, i8* %gep6
+  %result = or i8 %load, %load2
+  ret i8 %result
+}
+
+%PR13916.struct = type { i8 }
+
+define void @PR13916.1() {
+; Ensure that we handle overlapping memcpy intrinsics correctly, especially in
+; the case where there is a directly identical value for both source and dest.
+; CHECK: @PR13916.1
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+entry:
+  %a = alloca i8
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i1 false)
+  %tmp2 = load i8, i8* %a
+  ret void
+}
+
+define void @PR13916.2() {
+; Check whether we continue to handle them correctly when they start off with
+; different pointer value chains, but during rewriting we coalesce them into the
+; same value.
+; CHECK: @PR13916.2
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+entry:
+  %a = alloca %PR13916.struct, align 1
+  br i1 undef, label %if.then, label %if.end
+
+if.then:
+  %tmp0 = bitcast %PR13916.struct* %a to i8*
+  %tmp1 = bitcast %PR13916.struct* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i1 false)
+  br label %if.end
+
+if.end:
+  %gep = getelementptr %PR13916.struct, %PR13916.struct* %a, i32 0, i32 0
+  %tmp2 = load i8, i8* %gep
+  ret void
+}
+
+define void @PR13990() {
+; Ensure we can handle cases where processing one alloca causes the other
+; alloca to become dead and get deleted. This might crash or fail under
+; Valgrind if we regress.
+; CHECK-LABEL: @PR13990(
+; CHECK-NOT: alloca
+; CHECK: unreachable
+; CHECK: unreachable
+
+entry:
+  %tmp1 = alloca i8*
+  %tmp2 = alloca i8*
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  store i8* undef, i8** %tmp2
+  br i1 undef, label %bb2, label %bb3
+
+bb2:
+  %tmp50 = select i1 undef, i8** %tmp2, i8** %tmp1
+  br i1 undef, label %bb3, label %bb4
+
+bb3:
+  unreachable
+
+bb4:
+  unreachable
+}
+
+define double @PR13969(double %x) {
+; Check that we detect when promotion will un-escape an alloca and iterate to
+; re-try running SROA over that alloca. Without that, the two allocas that are
+; stored into a dead alloca don't get rewritten and promoted.
+; CHECK-LABEL: @PR13969(
+
+entry:
+  %a = alloca double
+  %b = alloca double*
+  %c = alloca double
+; CHECK-NOT: alloca
+
+  store double %x, double* %a
+  store double* %c, double** %b
+  store double* %a, double** %b
+  store double %x, double* %c
+  %ret = load double, double* %a
+; CHECK-NOT: store
+; CHECK-NOT: load
+
+  ret double %ret
+; CHECK: ret double %x
+}
+
+%PR14034.struct = type { { {} }, i32, %PR14034.list }
+%PR14034.list = type { %PR14034.list*, %PR14034.list* }
+
+define void @PR14034() {
+; This test case tries to form GEPs into the empty leading struct members, and
+; subsequently crashed (under valgrind) before we fixed the PR. The important
+; thing is to handle empty structs gracefully.
+; CHECK-LABEL: @PR14034(
+
+entry:
+  %a = alloca %PR14034.struct
+  %list = getelementptr %PR14034.struct, %PR14034.struct* %a, i32 0, i32 2
+  %prev = getelementptr %PR14034.list, %PR14034.list* %list, i32 0, i32 1
+  store %PR14034.list* undef, %PR14034.list** %prev
+  %cast0 = bitcast %PR14034.struct* undef to i8*
+  %cast1 = bitcast %PR14034.struct* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i1 false)
+  ret void
+}
+
+define i32 @test22(i32 %x) {
+; Test that SROA and promotion is not confused by a grab bax mixture of pointer
+; types involving wrapper aggregates and zero-length aggregate members.
+; CHECK-LABEL: @test22(
+
+entry:
+  %a1 = alloca { { [1 x { i32 }] } }
+  %a2 = alloca { {}, { float }, [0 x i8] }
+  %a3 = alloca { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }
+; CHECK-NOT: alloca
+
+  %wrap1 = insertvalue [1 x { i32 }] undef, i32 %x, 0, 0
+  %gep1 = getelementptr { { [1 x { i32 }] } }, { { [1 x { i32 }] } }* %a1, i32 0, i32 0, i32 0
+  store [1 x { i32 }] %wrap1, [1 x { i32 }]* %gep1
+
+  %gep2 = getelementptr { { [1 x { i32 }] } }, { { [1 x { i32 }] } }* %a1, i32 0, i32 0
+  %ptrcast1 = bitcast { [1 x { i32 }] }* %gep2 to { [1 x { float }] }*
+  %load1 = load { [1 x { float }] }, { [1 x { float }] }* %ptrcast1
+  %unwrap1 = extractvalue { [1 x { float }] } %load1, 0, 0
+
+  %wrap2 = insertvalue { {}, { float }, [0 x i8] } undef, { float } %unwrap1, 1
+  store { {}, { float }, [0 x i8] } %wrap2, { {}, { float }, [0 x i8] }* %a2
+
+  %gep3 = getelementptr { {}, { float }, [0 x i8] }, { {}, { float }, [0 x i8] }* %a2, i32 0, i32 1, i32 0
+  %ptrcast2 = bitcast float* %gep3 to <4 x i8>*
+  %load3 = load <4 x i8>, <4 x i8>* %ptrcast2
+  %valcast1 = bitcast <4 x i8> %load3 to i32
+
+  %wrap3 = insertvalue [1 x [1 x i32]] undef, i32 %valcast1, 0, 0
+  %wrap4 = insertvalue { [1 x [1 x i32]], {} } undef, [1 x [1 x i32]] %wrap3, 0
+  %gep4 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }, { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1
+  %ptrcast3 = bitcast { [0 x double], [1 x [1 x <4 x i8>]], {} }* %gep4 to { [1 x [1 x i32]], {} }*
+  store { [1 x [1 x i32]], {} } %wrap4, { [1 x [1 x i32]], {} }* %ptrcast3
+
+  %gep5 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }, { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1, i32 1, i32 0
+  %ptrcast4 = bitcast [1 x <4 x i8>]* %gep5 to { {}, float, {} }*
+  %load4 = load { {}, float, {} }, { {}, float, {} }* %ptrcast4
+  %unwrap2 = extractvalue { {}, float, {} } %load4, 1
+  %valcast2 = bitcast float %unwrap2 to i32
+
+  ret i32 %valcast2
+; CHECK: ret i32
+}
+
+define void @PR14059.1(double* %d) {
+; In PR14059 a peculiar construct was identified as something that is used
+; pervasively in ARM's ABI-calling-convention lowering: the passing of a struct
+; of doubles via an array of i32 in order to place the data into integer
+; registers. This in turn was missed as an optimization by SROA due to the
+; partial loads and stores of integers to the double alloca we were trying to
+; form and promote. The solution is to widen the integer operations to be
+; whole-alloca operations, and perform the appropriate bitcasting on the
+; *values* rather than the pointers. When this works, partial reads and writes
+; via integers can be promoted away.
+; CHECK: @PR14059.1
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+entry:
+  %X.sroa.0.i = alloca double, align 8
+  %0 = bitcast double* %X.sroa.0.i to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0)
+
+  ; Store to the low 32-bits...
+  %X.sroa.0.0.cast2.i = bitcast double* %X.sroa.0.i to i32*
+  store i32 0, i32* %X.sroa.0.0.cast2.i, align 8
+
+  ; Also use a memset to the middle 32-bits for fun.
+  %X.sroa.0.2.raw_idx2.i = getelementptr inbounds i8, i8* %0, i32 2
+  call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i1 false)
+
+  ; Or a memset of the whole thing.
+  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i1 false)
+
+  ; Write to the high 32-bits with a memcpy.
+  %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8, i8* %0, i32 4
+  %d.raw = bitcast double* %d to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i1 false)
+
+  ; Store to the high 32-bits...
+  %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
+  store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4
+
+  ; Do the actual math...
+  %X.sroa.0.0.load1.i = load double, double* %X.sroa.0.i, align 8
+  %accum.real.i = load double, double* %d, align 8
+  %add.r.i = fadd double %accum.real.i, %X.sroa.0.0.load1.i
+  store double %add.r.i, double* %d, align 8
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0)
+  ret void
+}
+
+define i64 @PR14059.2({ float, float }* %phi) {
+; Check that SROA can split up alloca-wide integer loads and stores where the
+; underlying alloca has smaller components that are accessed independently. This
+; shows up particularly with ABI lowering patterns coming out of Clang that rely
+; on the particular register placement of a single large integer return value.
+; CHECK: @PR14059.2
+
+entry:
+  %retval = alloca { float, float }, align 4
+  ; CHECK-NOT: alloca
+
+  %0 = bitcast { float, float }* %retval to i64*
+  store i64 0, i64* %0
+  ; CHECK-NOT: store
+
+  %phi.realp = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 0
+  %phi.real = load float, float* %phi.realp
+  %phi.imagp = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 1
+  %phi.imag = load float, float* %phi.imagp
+  ; CHECK:      %[[realp:.*]] = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 0
+  ; CHECK-NEXT: %[[real:.*]] = load float, float* %[[realp]]
+  ; CHECK-NEXT: %[[imagp:.*]] = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 1
+  ; CHECK-NEXT: %[[imag:.*]] = load float, float* %[[imagp]]
+
+  %real = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 0
+  %imag = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 1
+  store float %phi.real, float* %real
+  store float %phi.imag, float* %imag
+  ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
+  ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
+  ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
+  ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
+  ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
+  ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
+  ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
+  ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
+  ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
+
+  %1 = load i64, i64* %0, align 1
+  ret i64 %1
+  ; CHECK-NEXT: ret i64 %[[real_insert]]
+}
+
+define void @PR14105({ [16 x i8] }* %ptr) {
+; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is
+; sign as negative. We use a volatile memcpy to ensure promotion never actually
+; occurs.
+; CHECK-LABEL: @PR14105(
+
+entry:
+  %a = alloca { [16 x i8] }, align 8
+; CHECK: alloca [16 x i8], align 8
+
+  %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] }* %ptr, i64 -1
+; CHECK-NEXT: getelementptr inbounds { [16 x i8] }, { [16 x i8] }* %ptr, i64 -1, i32 0, i64 0
+
+  %cast1 = bitcast { [16 x i8 ] }* %gep to i8*
+  %cast2 = bitcast { [16 x i8 ] }* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true)
+  ret void
+; CHECK: ret
+}
+
+define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) {
+; Make sure this the right address space pointer is used for type check.
+; CHECK-LABEL: @PR14105_as1(
+
+entry:
+  %a = alloca { [16 x i8] }, align 8
+; CHECK: alloca [16 x i8], align 8
+
+  %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i64 -1
+; CHECK-NEXT: getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i16 -1, i32 0, i16 0
+
+  %cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)*
+  %cast2 = bitcast { [16 x i8 ] }* %a to i8*
+  call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true)
+  ret void
+; CHECK: ret
+}
+
+define void @PR14465() {
+; Ensure that we don't crash when analyzing a alloca larger than the maximum
+; integer type width (MAX_INT_BITS) supported by llvm (1048576*32 > (1<<23)-1).
+; CHECK-LABEL: @PR14465(
+
+  %stack = alloca [1048576 x i32], align 16
+; CHECK: alloca [1048576 x i32]
+  %cast = bitcast [1048576 x i32]* %stack to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 16 %cast, i8 -2, i64 4194304, i1 false)
+  ret void
+; CHECK: ret
+}
+
+define void @PR14548(i1 %x) {
+; Handle a mixture of i1 and i8 loads and stores to allocas. This particular
+; pattern caused crashes and invalid output in the PR, and its nature will
+; trigger a mixture in several permutations as we resolve each alloca
+; iteratively.
+; Note that we don't do a particularly good *job* of handling these mixtures,
+; but the hope is that this is very rare.
+; CHECK-LABEL: @PR14548(
+
+entry:
+  %a = alloca <{ i1 }>, align 8
+  %b = alloca <{ i1 }>, align 8
+; CHECK:      %[[a:.*]] = alloca i8, align 8
+; CHECK-NEXT: %[[b:.*]] = alloca i8, align 8
+
+  %b.i1 = bitcast <{ i1 }>* %b to i1*
+  store i1 %x, i1* %b.i1, align 8
+  %b.i8 = bitcast <{ i1 }>* %b to i8*
+  %foo = load i8, i8* %b.i8, align 1
+; CHECK-NEXT: %[[b_cast:.*]] = bitcast i8* %[[b]] to i1*
+; CHECK-NEXT: store i1 %x, i1* %[[b_cast]], align 8
+; CHECK-NEXT: {{.*}} = load i8, i8* %[[b]], align 8
+
+  %a.i8 = bitcast <{ i1 }>* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i1 false) nounwind
+  %bar = load i8, i8* %a.i8, align 1
+  %a.i1 = getelementptr inbounds <{ i1 }>, <{ i1 }>* %a, i32 0, i32 0
+  %baz = load i1, i1* %a.i1, align 1
+; CHECK-NEXT: %[[copy:.*]] = load i8, i8* %[[b]], align 8
+; CHECK-NEXT: store i8 %[[copy]], i8* %[[a]], align 8
+; CHECK-NEXT: {{.*}} = load i8, i8* %[[a]], align 8
+; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1*
+; CHECK-NEXT: {{.*}} = load i1, i1* %[[a_cast]], align 8
+
+  ret void
+}
+
+define <3 x i8> @PR14572.1(i32 %x) {
+; Ensure that a split integer store which is wider than the type size of the
+; alloca (relying on the alloc size padding) doesn't trigger an assert.
+; CHECK: @PR14572.1
+
+entry:
+  %a = alloca <3 x i8>, align 4
+; CHECK-NOT: alloca
+
+  %cast = bitcast <3 x i8>* %a to i32*
+  store i32 %x, i32* %cast, align 1
+  %y = load <3 x i8>, <3 x i8>* %a, align 4
+  ret <3 x i8> %y
+; CHECK: ret <3 x i8>
+}
+
+define i32 @PR14572.2(<3 x i8> %x) {
+; Ensure that a split integer load which is wider than the type size of the
+; alloca (relying on the alloc size padding) doesn't trigger an assert.
+; CHECK: @PR14572.2
+
+entry:
+  %a = alloca <3 x i8>, align 4
+; CHECK-NOT: alloca
+
+  store <3 x i8> %x, <3 x i8>* %a, align 1
+  %cast = bitcast <3 x i8>* %a to i32*
+  %y = load i32, i32* %cast, align 4
+  ret i32 %y
+; CHECK: ret i32
+}
+
+define i32 @PR14601(i32 %x) {
+; Don't try to form a promotable integer alloca when there is a variable length
+; memory intrinsic.
+; CHECK-LABEL: @PR14601(
+
+entry:
+  %a = alloca i32
+; CHECK: alloca
+
+  %a.i8 = bitcast i32* %a to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i1 false)
+  %v = load i32, i32* %a
+  ret i32 %v
+}
+
+define void @PR15674(i8* %data, i8* %src, i32 %size) {
+; Arrange (via control flow) to have unmerged stores of a particular width to
+; an alloca where we incrementally store from the end of the array toward the
+; beginning of the array. Ensure that the final integer store, despite being
+; convertable to the integer type that we end up promoting this alloca toward,
+; doesn't get widened to a full alloca store.
+; CHECK-LABEL: @PR15674(
+
+entry:
+  %tmp = alloca [4 x i8], align 1
+; CHECK: alloca i32
+
+  switch i32 %size, label %end [
+    i32 4, label %bb4
+    i32 3, label %bb3
+    i32 2, label %bb2
+    i32 1, label %bb1
+  ]
+
+bb4:
+  %src.gep3 = getelementptr inbounds i8, i8* %src, i32 3
+  %src.3 = load i8, i8* %src.gep3
+  %tmp.gep3 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 3
+  store i8 %src.3, i8* %tmp.gep3
+; CHECK: store i8
+
+  br label %bb3
+
+bb3:
+  %src.gep2 = getelementptr inbounds i8, i8* %src, i32 2
+  %src.2 = load i8, i8* %src.gep2
+  %tmp.gep2 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 2
+  store i8 %src.2, i8* %tmp.gep2
+; CHECK: store i8
+
+  br label %bb2
+
+bb2:
+  %src.gep1 = getelementptr inbounds i8, i8* %src, i32 1
+  %src.1 = load i8, i8* %src.gep1
+  %tmp.gep1 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 1
+  store i8 %src.1, i8* %tmp.gep1
+; CHECK: store i8
+
+  br label %bb1
+
+bb1:
+  %src.gep0 = getelementptr inbounds i8, i8* %src, i32 0
+  %src.0 = load i8, i8* %src.gep0
+  %tmp.gep0 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 0
+  store i8 %src.0, i8* %tmp.gep0
+; CHECK: store i8
+
+  br label %end
+
+end:
+  %tmp.raw = bitcast [4 x i8]* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %data, i8* %tmp.raw, i32 %size, i1 false)
+  ret void
+; CHECK: ret void
+}
+
+define void @PR15805(i1 %a, i1 %b) {
+; CHECK-LABEL: @PR15805(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+  %c = alloca i64, align 8
+  %p.0.c = select i1 undef, i64* %c, i64* %c
+  %cond.in = select i1 undef, i64* %p.0.c, i64* %c
+  %cond = load i64, i64* %cond.in, align 8
+  ret void
+}
+
+define void @PR15805.1(i1 %a, i1 %b) {
+; Same as the normal PR15805, but rigged to place the use before the def inside
+; of looping unreachable code. This helps ensure that we aren't sensitive to the
+; order in which the uses of the alloca are visited.
+;
+; CHECK-LABEL: @PR15805.1(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+  %c = alloca i64, align 8
+  br label %exit
+
+loop:
+  %cond.in = select i1 undef, i64* %c, i64* %p.0.c
+  %p.0.c = select i1 undef, i64* %c, i64* %c
+  %cond = load i64, i64* %cond.in, align 8
+  br i1 undef, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @PR16651.1(i8* %a) {
+; This test case caused a crash due to the volatile memcpy in combination with
+; lowering to integer loads and stores of a width other than that of the original
+; memcpy.
+;
+; CHECK-LABEL: @PR16651.1(
+; CHECK: alloca i16
+; CHECK: alloca i8
+; CHECK: alloca i8
+; CHECK: unreachable
+
+entry:
+  %b = alloca i32, align 4
+  %b.cast = bitcast i32* %b to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %b.cast, i8* align 4 %a, i32 4, i1 true)
+  %b.gep = getelementptr inbounds i8, i8* %b.cast, i32 2
+  load i8, i8* %b.gep, align 2
+  unreachable
+}
+
+define void @PR16651.2() {
+; This test case caused a crash due to failing to promote given a select that
+; can't be speculated. It shouldn't be promoted, but we missed that fact when
+; analyzing whether we could form a vector promotion because that code didn't
+; bail on select instructions.
+;
+; CHECK-LABEL: @PR16651.2(
+; CHECK: alloca <2 x float>
+; CHECK: ret void
+
+entry:
+  %tv1 = alloca { <2 x float>, <2 x float> }, align 8
+  %0 = getelementptr { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* %tv1, i64 0, i32 1
+  store <2 x float> undef, <2 x float>* %0, align 8
+  %1 = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* %tv1, i64 0, i32 1, i64 0
+  %cond105.in.i.i = select i1 undef, float* null, float* %1
+  %cond105.i.i = load float, float* %cond105.in.i.i, align 8
+  ret void
+}
+
+define void @test23(i32 %x) {
+; CHECK-LABEL: @test23(
+; CHECK-NOT: alloca
+; CHECK: ret void
+entry:
+  %a = alloca i32, align 4
+  store i32 %x, i32* %a, align 4
+  %gep1 = getelementptr inbounds i32, i32* %a, i32 1
+  %gep0 = getelementptr inbounds i32, i32* %a, i32 0
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cast0 = bitcast i32* %gep0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast0, i32 4, i1 false)
+  ret void
+}
+
+define void @PR18615() {
+; CHECK-LABEL: @PR18615(
+; CHECK-NOT: alloca
+; CHECK: ret void
+entry:
+  %f = alloca i8
+  %gep = getelementptr i8, i8* %f, i64 -1
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %gep, i32 1, i1 false)
+  ret void
+}
+
+define void @test24(i8* %src, i8* %dst) {
+; CHECK-LABEL: @test24(
+; CHECK: alloca i64, align 16
+; CHECK: load volatile i64, i64* %{{[^,]*}}, align 1, !tbaa [[TAG_0]]
+; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 16, !tbaa [[TAG_0]]
+; CHECK: load volatile i64, i64* %{{[^,]*}}, align 16, !tbaa [[TAG_3]]
+; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 1, !tbaa [[TAG_3]]
+
+entry:
+  %a = alloca i64, align 16
+  %ptr = bitcast i64* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i1 true), !tbaa !0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i1 true), !tbaa !3
+  ret void
+}
+
+define float @test25() {
+; Check that we split up stores in order to promote the smaller SSA values.. These types
+; of patterns can arise because LLVM maps small memcpy's to integer load and
+; stores. If we get a memcpy of an aggregate (such as C and C++ frontends would
+; produce, but so might any language frontend), this will in many cases turn into
+; an integer load and store. SROA needs to be extremely powerful to correctly
+; handle these cases and form splitable and promotable SSA values.
+;
+; CHECK-LABEL: @test25(
+; CHECK-NOT: alloca
+; CHECK: %[[F1:.*]] = bitcast i32 0 to float
+; CHECK: %[[F2:.*]] = bitcast i32 1065353216 to float
+; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]]
+; CHECK: ret float %[[SUM]]
+
+entry:
+  %a = alloca i64
+  %b = alloca i64
+  %a.cast = bitcast i64* %a to [2 x float]*
+  %a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0
+  %a.gep2 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 1
+  %b.cast = bitcast i64* %b to [2 x float]*
+  %b.gep1 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 0
+  %b.gep2 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 1
+  store float 0.0, float* %a.gep1
+  store float 1.0, float* %a.gep2
+  %v = load i64, i64* %a
+  store i64 %v, i64* %b
+  %f1 = load float, float* %b.gep1
+  %f2 = load float, float* %b.gep2
+  %ret = fadd float %f1, %f2
+  ret float %ret
+}
+
+ at complex1 = external global [2 x float]
+ at complex2 = external global [2 x float]
+
+define void @test26() {
+; Test a case of splitting up loads and stores against a globals.
+;
+; CHECK-LABEL: @test26(
+; CHECK-NOT: alloca
+; CHECK: %[[L1:.*]] = load i32, i32* bitcast
+; CHECK: %[[L2:.*]] = load i32, i32* bitcast
+; CHECK: %[[F1:.*]] = bitcast i32 %[[L1]] to float
+; CHECK: %[[F2:.*]] = bitcast i32 %[[L2]] to float
+; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]]
+; CHECK: %[[C1:.*]] = bitcast float %[[SUM]] to i32
+; CHECK: %[[C2:.*]] = bitcast float %[[SUM]] to i32
+; CHECK: store i32 %[[C1]], i32* bitcast
+; CHECK: store i32 %[[C2]], i32* bitcast
+; CHECK: ret void
+
+entry:
+  %a = alloca i64
+  %a.cast = bitcast i64* %a to [2 x float]*
+  %a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0
+  %a.gep2 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 1
+  %v1 = load i64, i64* bitcast ([2 x float]* @complex1 to i64*)
+  store i64 %v1, i64* %a
+  %f1 = load float, float* %a.gep1
+  %f2 = load float, float* %a.gep2
+  %sum = fadd float %f1, %f2
+  store float %sum, float* %a.gep1
+  store float %sum, float* %a.gep2
+  %v2 = load i64, i64* %a
+  store i64 %v2, i64* bitcast ([2 x float]* @complex2 to i64*)
+  ret void
+}
+
+define float @test27() {
+; Another, more complex case of splittable i64 loads and stores. This example
+; is a particularly challenging one because the load and store both point into
+; the alloca SROA is processing, and they overlap but at an offset.
+;
+; CHECK-LABEL: @test27(
+; CHECK-NOT: alloca
+; CHECK: %[[F1:.*]] = bitcast i32 0 to float
+; CHECK: %[[F2:.*]] = bitcast i32 1065353216 to float
+; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]]
+; CHECK: ret float %[[SUM]]
+
+entry:
+  %a = alloca [12 x i8]
+  %gep1 = getelementptr [12 x i8], [12 x i8]* %a, i32 0, i32 0
+  %gep2 = getelementptr [12 x i8], [12 x i8]* %a, i32 0, i32 4
+  %gep3 = getelementptr [12 x i8], [12 x i8]* %a, i32 0, i32 8
+  %iptr1 = bitcast i8* %gep1 to i64*
+  %iptr2 = bitcast i8* %gep2 to i64*
+  %fptr1 = bitcast i8* %gep1 to float*
+  %fptr2 = bitcast i8* %gep2 to float*
+  %fptr3 = bitcast i8* %gep3 to float*
+  store float 0.0, float* %fptr1
+  store float 1.0, float* %fptr2
+  %v = load i64, i64* %iptr1
+  store i64 %v, i64* %iptr2
+  %f1 = load float, float* %fptr2
+  %f2 = load float, float* %fptr3
+  %ret = fadd float %f1, %f2
+  ret float %ret
+}
+
+define i32 @PR22093() {
+; Test that we don't try to pre-split a splittable store of a splittable but
+; not pre-splittable load over the same alloca. We "handle" this case when the
+; load is unsplittable but unrelated to this alloca by just generating extra
+; loads without touching the original, but when the original load was out of
+; this alloca we need to handle it specially to ensure the splits line up
+; properly for rewriting.
+;
+; CHECK-LABEL: @PR22093(
+; CHECK-NOT: alloca
+; CHECK: alloca i16
+; CHECK-NOT: alloca
+; CHECK: store volatile i16
+
+entry:
+  %a = alloca i32
+  %a.cast = bitcast i32* %a to i16*
+  store volatile i16 42, i16* %a.cast
+  %load = load i32, i32* %a
+  store i32 %load, i32* %a
+  ret i32 %load
+}
+
+define void @PR22093.2() {
+; Another way that we end up being unable to split a particular set of loads
+; and stores can even have ordering importance. Here we have a load which is
+; pre-splittable by itself, and the first store is also compatible. But the
+; second store of the load makes the load unsplittable because of a mismatch of
+; splits. Because this makes the load unsplittable, we also have to go back and
+; remove the first store from the presplit candidates as its load won't be
+; presplit.
+;
+; CHECK-LABEL: @PR22093.2(
+; CHECK-NOT: alloca
+; CHECK: alloca i16
+; CHECK-NEXT: alloca i8
+; CHECK-NOT: alloca
+; CHECK: store volatile i16
+; CHECK: store volatile i8
+
+entry:
+  %a = alloca i64
+  %a.cast1 = bitcast i64* %a to i32*
+  %a.cast2 = bitcast i64* %a to i16*
+  store volatile i16 42, i16* %a.cast2
+  %load = load i32, i32* %a.cast1
+  store i32 %load, i32* %a.cast1
+  %a.gep1 = getelementptr i32, i32* %a.cast1, i32 1
+  %a.cast3 = bitcast i32* %a.gep1 to i8*
+  store volatile i8 13, i8* %a.cast3
+  store i32 %load, i32* %a.gep1
+  ret void
+}
+
+define void @PR23737() {
+; CHECK-LABEL: @PR23737(
+; CHECK: store atomic volatile {{.*}} seq_cst
+; CHECK: load atomic volatile {{.*}} seq_cst
+entry:
+  %ptr = alloca i64, align 8
+  store atomic volatile i64 0, i64* %ptr seq_cst, align 8
+  %load = load atomic volatile i64, i64* %ptr seq_cst, align 8
+  ret void
+}
+
+define i16 @PR24463() {
+; Ensure we can handle a very interesting case where there is an integer-based
+; rewrite of the uses of the alloca, but where one of the integers in that is
+; a sub-integer that requires extraction *and* extends past the end of the
+; alloca. SROA can split the alloca to avoid shift or trunc.
+;
+; CHECK-LABEL: @PR24463(
+; CHECK-NOT: alloca
+; CHECK-NOT: trunc
+; CHECK-NOT: lshr
+; CHECK: %[[ZEXT:.*]] = zext i8 {{.*}} to i16
+; CHECK: ret i16 %[[ZEXT]]
+entry:
+  %alloca = alloca [3 x i8]
+  %gep1 = getelementptr inbounds [3 x i8], [3 x i8]* %alloca, i64 0, i64 1
+  %bc1 = bitcast i8* %gep1 to i16*
+  store i16 0, i16* %bc1
+  %gep2 = getelementptr inbounds [3 x i8], [3 x i8]* %alloca, i64 0, i64 2
+  %bc2 = bitcast i8* %gep2 to i16*
+  %load = load i16, i16* %bc2
+  ret i16 %load
+}
+
+%struct.STest = type { %struct.SPos, %struct.SPos }
+%struct.SPos = type { float, float }
+
+define void @PR25873(%struct.STest* %outData) {
+; CHECK-LABEL: @PR25873(
+; CHECK: store i32 1123418112
+; CHECK: store i32 1139015680
+; CHECK: %[[HIZEXT:.*]] = zext i32 1139015680 to i64
+; CHECK: %[[HISHL:.*]] = shl i64 %[[HIZEXT]], 32
+; CHECK: %[[HIMASK:.*]] = and i64 undef, 4294967295
+; CHECK: %[[HIINSERT:.*]] = or i64 %[[HIMASK]], %[[HISHL]]
+; CHECK: %[[LOZEXT:.*]] = zext i32 1123418112 to i64
+; CHECK: %[[LOMASK:.*]] = and i64 %[[HIINSERT]], -4294967296
+; CHECK: %[[LOINSERT:.*]] = or i64 %[[LOMASK]], %[[LOZEXT]]
+; CHECK: store i64 %[[LOINSERT]]
+entry:
+  %tmpData = alloca %struct.STest, align 8
+  %0 = bitcast %struct.STest* %tmpData to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %0)
+  %x = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 0, i32 0
+  store float 1.230000e+02, float* %x, align 8
+  %y = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 0, i32 1
+  store float 4.560000e+02, float* %y, align 4
+  %m_posB = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 1
+  %1 = bitcast %struct.STest* %tmpData to i64*
+  %2 = bitcast %struct.SPos* %m_posB to i64*
+  %3 = load i64, i64* %1, align 8
+  store i64 %3, i64* %2, align 8
+  %4 = bitcast %struct.STest* %outData to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %4, i8* align 4 %0, i64 16, i1 false)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %0)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+define void @PR27999() unnamed_addr {
+; CHECK-LABEL: @PR27999(
+; CHECK: entry-block:
+; CHECK-NEXT: ret void
+entry-block:
+  %0 = alloca [2 x i64], align 8
+  %1 = bitcast [2 x i64]* %0 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %1)
+  %2 = getelementptr inbounds [2 x i64], [2 x i64]* %0, i32 0, i32 1
+  %3 = bitcast i64* %2 to i8*
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %3)
+  ret void
+}
+
+define void @PR29139() {
+; CHECK-LABEL: @PR29139(
+; CHECK: bb1:
+; CHECK-NEXT: ret void
+bb1:
+  %e.7.sroa.6.i = alloca i32, align 1
+  %e.7.sroa.6.0.load81.i = load i32, i32* %e.7.sroa.6.i, align 1
+  %0 = bitcast i32* %e.7.sroa.6.i to i8*
+  call void @llvm.lifetime.end.p0i8(i64 2, i8* %0)
+  ret void
+}
+
+; PR35657 reports assertion failure with this code
+define void @PR35657(i64 %v) {
+; CHECK-LABEL: @PR35657
+; CHECK: call void @callee16(i16 %{{.*}})
+; CHECK: call void @callee48(i48 %{{.*}})
+; CHECK: ret void
+entry:
+  %a48 = alloca i48
+  %a48.cast64 = bitcast i48* %a48 to i64*
+  store i64 %v, i64* %a48.cast64
+  %a48.cast16 = bitcast i48* %a48 to i16*
+  %b0_15 = load i16, i16* %a48.cast16
+  %a48.cast8 = bitcast i48* %a48 to i8*
+  %a48_offset2 = getelementptr inbounds i8, i8* %a48.cast8, i64 2
+  %a48_offset2.cast48 = bitcast i8* %a48_offset2 to i48*
+  %b16_63 = load i48, i48* %a48_offset2.cast48, align 2
+  call void @callee16(i16 %b0_15)
+  call void @callee48(i48 %b16_63)
+  ret void
+}
+
+declare void @callee16(i16 %a)
+declare void @callee48(i48 %a)
+
+define void @test28(i64 %v) #0 {
+; SROA should split the first i64 store to avoid additional and/or instructions
+; when storing into i32 fields
+
+; CHECK-LABEL: @test28(
+; CHECK-NOT: alloca
+; CHECK-NOT: and
+; CHECK-NOT: or
+; CHECK:      %[[shift:.*]] = lshr i64 %v, 32
+; CHECK-NEXT: %{{.*}} = trunc i64 %[[shift]] to i32
+; CHECK-NEXT: ret void
+
+entry:
+  %t = alloca { i64, i32, i32 }
+
+  %b = getelementptr { i64, i32, i32 }, { i64, i32, i32 }* %t, i32 0, i32 1
+  %0 = bitcast i32* %b to i64*
+  store i64 %v, i64* %0
+
+  %1 = load i32, i32* %b
+  %c = getelementptr { i64, i32, i32 }, { i64, i32, i32 }* %t, i32 0, i32 2
+  store i32 %1, i32* %c
+  ret void
+}
+
+declare void @llvm.lifetime.start.isVoid.i64.p0i8(i64, [10 x float]* nocapture)
+declare void @llvm.lifetime.end.isVoid.i64.p0i8(i64, [10 x float]* nocapture)
+ at array = dso_local global [10 x float] undef, align 4
+
+define void @test29(i32 %num, i32 %tid) {
+; CHECK-LABEL: @test29(
+; CHECK-NOT: alloca [10 x float]
+; CHECK: ret void
+
+entry:
+  %ra = alloca [10 x float], align 4
+  call void @llvm.lifetime.start.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %ra)
+
+  %cmp1 = icmp sgt i32 %num, 0
+  br i1 %cmp1, label %bb1, label %bb7
+
+bb1:
+  %tobool = icmp eq i32 %tid, 0
+  %conv.i = zext i32 %tid to i64
+  %0 = bitcast [10 x float]* %ra to i32*
+  %1 = load i32, i32* %0, align 4
+  %arrayidx5 = getelementptr inbounds [10 x float], [10 x float]* @array, i64 0, i64 %conv.i
+  %2 = bitcast float* %arrayidx5 to i32*
+  br label %bb2
+
+bb2:
+  %i.02 = phi i32 [ %num, %bb1 ], [ %sub, %bb5 ]
+  br i1 %tobool, label %bb3, label %bb4
+
+bb3:
+  br label %bb5
+
+bb4:
+  store i32 %1, i32* %2, align 4
+  br label %bb5
+
+bb5:
+  %sub = add i32 %i.02, -1
+  %cmp = icmp sgt i32 %sub, 0
+  br i1 %cmp, label %bb2, label %bb6
+
+bb6:
+  br label %bb7
+
+bb7:
+  call void @llvm.lifetime.end.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %ra)
+  ret void
+}
+
+!0 = !{!1, !1, i64 0, i64 1}
+!1 = !{!2, i64 1, !"type_0"}
+!2 = !{!"root"}
+!3 = !{!4, !4, i64 0, i64 1}
+!4 = !{!2, i64 1, !"type_3"}
+!5 = !{!6, !6, i64 0, i64 1}
+!6 = !{!2, i64 1, !"type_5"}
+!7 = !{!8, !8, i64 0, i64 1}
+!8 = !{!2, i64 1, !"type_7"}
+!9 = !{!10, !10, i64 0, i64 1}
+!10 = !{!2, i64 1, !"type_9"}
+!11 = !{!12, !12, i64 0, i64 1}
+!12 = !{!2, i64 1, !"type_11"}
+!13 = !{!14, !14, i64 0, i64 1}
+!14 = !{!2, i64 1, !"type_13"}
+!15 = !{!16, !16, i64 0, i64 1}
+!16 = !{!2, i64 1, !"type_15"}
+!17 = !{!18, !18, i64 0, i64 1}
+!18 = !{!2, i64 1, !"type_17"}
+!19 = !{!20, !20, i64 0, i64 1}
+!20 = !{!2, i64 1, !"type_19"}
+!21 = !{!22, !22, i64 0, i64 1}
+!22 = !{!2, i64 1, !"type_21"}
+!23 = !{!24, !24, i64 0, i64 1}
+!24 = !{!2, i64 1, !"type_23"}
+!25 = !{!26, !26, i64 0, i64 1}
+!26 = !{!2, i64 1, !"type_25"}
+!27 = !{!28, !28, i64 0, i64 1}
+!28 = !{!2, i64 1, !"type_27"}
+!29 = !{!30, !30, i64 0, i64 1}
+!30 = !{!2, i64 1, !"type_29"}
+!31 = !{!32, !32, i64 0, i64 1}
+!32 = !{!2, i64 1, !"type_31"}
+!33 = !{!34, !34, i64 0, i64 1}
+!34 = !{!2, i64 1, !"type_33"}
+!35 = !{!36, !36, i64 0, i64 1}
+!36 = !{!2, i64 1, !"type_35"}
+!37 = !{!38, !38, i64 0, i64 1}
+!38 = !{!2, i64 1, !"type_37"}
+!39 = !{!40, !40, i64 0, i64 1}
+!40 = !{!2, i64 1, !"type_39"}
+!41 = !{!42, !42, i64 0, i64 1}
+!42 = !{!2, i64 1, !"type_41"}
+!43 = !{!44, !44, i64 0, i64 1}
+!44 = !{!2, i64 1, !"type_43"}
+!45 = !{!46, !46, i64 0, i64 1}
+!46 = !{!2, i64 1, !"type_45"}
+!47 = !{!48, !48, i64 0, i64 1}
+!48 = !{!2, i64 1, !"type_47"}
+!49 = !{!50, !50, i64 0, i64 1}
+!50 = !{!2, i64 1, !"type_49"}
+!51 = !{!52, !52, i64 0, i64 1}
+!52 = !{!2, i64 1, !"type_51"}
+!53 = !{!54, !54, i64 0, i64 1}
+!54 = !{!2, i64 1, !"type_53"}
+!55 = !{!56, !56, i64 0, i64 1}
+!56 = !{!2, i64 1, !"type_55"}
+!57 = !{!58, !58, i64 0, i64 1}
+!58 = !{!2, i64 1, !"type_57"}
+!59 = !{!60, !60, i64 0, i64 1}
+!60 = !{!2, i64 1, !"type_59"}
+
+; CHECK-DAG: [[TYPE_0:!.*]] = !{{{.*}}, !"type_0"}
+; CHECK-DAG: [[TAG_0]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_3:!.*]] = !{{{.*}}, !"type_3"}
+; CHECK-DAG: [[TAG_3]] = !{[[TYPE_3]], [[TYPE_3]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_5:!.*]] = !{{{.*}}, !"type_5"}
+; CHECK-DAG: [[TAG_5]] = !{[[TYPE_5]], [[TYPE_5]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_7:!.*]] = !{{{.*}}, !"type_7"}
+; CHECK-DAG: [[TAG_7]] = !{[[TYPE_7]], [[TYPE_7]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_9:!.*]] = !{{{.*}}, !"type_9"}
+; CHECK-DAG: [[TAG_9]] = !{[[TYPE_9]], [[TYPE_9]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_11:!.*]] = !{{{.*}}, !"type_11"}
+; CHECK-DAG: [[TAG_11]] = !{[[TYPE_11]], [[TYPE_11]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_13:!.*]] = !{{{.*}}, !"type_13"}
+; CHECK-DAG: [[TAG_13]] = !{[[TYPE_13]], [[TYPE_13]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_15:!.*]] = !{{{.*}}, !"type_15"}
+; CHECK-DAG: [[TAG_15]] = !{[[TYPE_15]], [[TYPE_15]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_17:!.*]] = !{{{.*}}, !"type_17"}
+; CHECK-DAG: [[TAG_17]] = !{[[TYPE_17]], [[TYPE_17]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_19:!.*]] = !{{{.*}}, !"type_19"}
+; CHECK-DAG: [[TAG_19]] = !{[[TYPE_19]], [[TYPE_19]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_21:!.*]] = !{{{.*}}, !"type_21"}
+; CHECK-DAG: [[TAG_21]] = !{[[TYPE_21]], [[TYPE_21]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_23:!.*]] = !{{{.*}}, !"type_23"}
+; CHECK-DAG: [[TAG_23]] = !{[[TYPE_23]], [[TYPE_23]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_25:!.*]] = !{{{.*}}, !"type_25"}
+; CHECK-DAG: [[TAG_25]] = !{[[TYPE_25]], [[TYPE_25]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_27:!.*]] = !{{{.*}}, !"type_27"}
+; CHECK-DAG: [[TAG_27]] = !{[[TYPE_27]], [[TYPE_27]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_29:!.*]] = !{{{.*}}, !"type_29"}
+; CHECK-DAG: [[TAG_29]] = !{[[TYPE_29]], [[TYPE_29]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_31:!.*]] = !{{{.*}}, !"type_31"}
+; CHECK-DAG: [[TAG_31]] = !{[[TYPE_31]], [[TYPE_31]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_33:!.*]] = !{{{.*}}, !"type_33"}
+; CHECK-DAG: [[TAG_33]] = !{[[TYPE_33]], [[TYPE_33]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_35:!.*]] = !{{{.*}}, !"type_35"}
+; CHECK-DAG: [[TAG_35]] = !{[[TYPE_35]], [[TYPE_35]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_37:!.*]] = !{{{.*}}, !"type_37"}
+; CHECK-DAG: [[TAG_37]] = !{[[TYPE_37]], [[TYPE_37]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_39:!.*]] = !{{{.*}}, !"type_39"}
+; CHECK-DAG: [[TAG_39]] = !{[[TYPE_39]], [[TYPE_39]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_41:!.*]] = !{{{.*}}, !"type_41"}
+; CHECK-DAG: [[TAG_41]] = !{[[TYPE_41]], [[TYPE_41]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_43:!.*]] = !{{{.*}}, !"type_43"}
+; CHECK-DAG: [[TAG_43]] = !{[[TYPE_43]], [[TYPE_43]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_45:!.*]] = !{{{.*}}, !"type_45"}
+; CHECK-DAG: [[TAG_45]] = !{[[TYPE_45]], [[TYPE_45]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_47:!.*]] = !{{{.*}}, !"type_47"}
+; CHECK-DAG: [[TAG_47]] = !{[[TYPE_47]], [[TYPE_47]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_49:!.*]] = !{{{.*}}, !"type_49"}
+; CHECK-DAG: [[TAG_49]] = !{[[TYPE_49]], [[TYPE_49]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_51:!.*]] = !{{{.*}}, !"type_51"}
+; CHECK-DAG: [[TAG_51]] = !{[[TYPE_51]], [[TYPE_51]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_53:!.*]] = !{{{.*}}, !"type_53"}
+; CHECK-DAG: [[TAG_53]] = !{[[TYPE_53]], [[TYPE_53]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_55:!.*]] = !{{{.*}}, !"type_55"}
+; CHECK-DAG: [[TAG_55]] = !{[[TYPE_55]], [[TYPE_55]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_57:!.*]] = !{{{.*}}, !"type_57"}
+; CHECK-DAG: [[TAG_57]] = !{[[TYPE_57]], [[TYPE_57]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_59:!.*]] = !{{{.*}}, !"type_59"}
+; CHECK-DAG: [[TAG_59]] = !{[[TYPE_59]], [[TYPE_59]], i64 0, i64 1}

Added: llvm/trunk/test/Transforms/SROA/big-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/big-endian.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/big-endian.ll (added)
+++ llvm/trunk/test/Transforms/SROA/big-endian.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,252 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+define i8 @test1() {
+; We fully promote these to the i24 load or store size, resulting in just masks
+; and other operations that instcombine will fold, but no alloca. Note this is
+; the same as test12 in basictest.ll, but here we assert big-endian byte
+; ordering.
+;
+; CHECK-LABEL: @test1(
+
+entry:
+  %a = alloca [3 x i8]
+  %b = alloca [3 x i8]
+; CHECK-NOT: alloca
+
+  %a0ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 0
+  store i8 0, i8* %a0ptr
+  %a1ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 1
+  store i8 0, i8* %a1ptr
+  %a2ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 2
+  store i8 0, i8* %a2ptr
+  %aiptr = bitcast [3 x i8]* %a to i24*
+  %ai = load i24, i24* %aiptr
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256
+; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]]
+; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
+; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
+; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
+; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift0:.*]] = shl i24 %[[ext0]], 16
+; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], 65535
+; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[shift0]]
+
+  %biptr = bitcast [3 x i8]* %b to i24*
+  store i24 %ai, i24* %biptr
+  %b0ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 0
+  %b0 = load i8, i8* %b0ptr
+  %b1ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 1
+  %b1 = load i8, i8* %b1ptr
+  %b2ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 2
+  %b2 = load i8, i8* %b2ptr
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK:      %[[shift0:.*]] = lshr i24 %[[insert0]], 16
+; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8
+; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
+; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
+; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[insert0]] to i8
+
+  %bsum0 = add i8 %b0, %b1
+  %bsum1 = add i8 %bsum0, %b2
+  ret i8 %bsum1
+; CHECK:      %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
+; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
+; CHECK-NEXT: ret i8 %[[sum1]]
+}
+
+define i64 @test2() {
+; Test for various mixed sizes of integer loads and stores all getting
+; promoted.
+;
+; CHECK-LABEL: @test2(
+
+entry:
+  %a = alloca [7 x i8]
+; CHECK-NOT: alloca
+
+  %a0ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 0
+  %a1ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 1
+  %a2ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 2
+  %a3ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 3
+
+; CHECK-NOT: store
+; CHECK-NOT: load
+
+  %a0i16ptr = bitcast i8* %a0ptr to i16*
+  store i16 1, i16* %a0i16ptr
+
+  store i8 1, i8* %a2ptr
+
+  %a3i24ptr = bitcast i8* %a3ptr to i24*
+  store i24 1, i24* %a3i24ptr
+
+  %a2i40ptr = bitcast i8* %a2ptr to i40*
+  store i40 1, i40* %a2i40ptr
+
+; the alloca is splitted into multiple slices
+; Here, i8 1 is for %a[6]
+; CHECK: %[[ext1:.*]] = zext i8 1 to i40
+; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, -256
+; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], %[[ext1]]
+
+; Here, i24 0 is for %a[3] to %a[5]
+; CHECK-NEXT: %[[ext2:.*]] = zext i24 0 to i40
+; CHECK-NEXT: %[[shift2:.*]] = shl i40 %[[ext2]], 8
+; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041
+; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], %[[shift2]]
+
+; Here, i8 0 is for %a[2]
+; CHECK-NEXT: %[[ext3:.*]] = zext i8 0 to i40
+; CHECK-NEXT: %[[shift3:.*]] = shl i40 %[[ext3]], 32
+; CHECK-NEXT: %[[mask3:.*]] = and i40 %[[insert2]], 4294967295
+; CHECK-NEXT: %[[insert3:.*]] = or i40 %[[mask3]], %[[shift3]]
+
+; CHECK-NEXT: %[[ext4:.*]] = zext i40 %[[insert3]] to i56
+; CHECK-NEXT: %[[mask4:.*]] = and i56 undef, -1099511627776
+; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[ext4]]
+
+; CHECK-NOT: store
+; CHECK-NOT: load
+
+  %aiptr = bitcast [7 x i8]* %a to i56*
+  %ai = load i56, i56* %aiptr
+  %ret = zext i56 %ai to i64
+  ret i64 %ret
+; Here, i16 1 is for %a[0] to %a[1]
+; CHECK-NEXT: %[[ext5:.*]] = zext i16 1 to i56
+; CHECK-NEXT: %[[shift5:.*]] = shl i56 %[[ext5]], 40
+; CHECK-NEXT: %[[mask5:.*]] = and i56 %[[insert4]], 1099511627775
+; CHECK-NEXT: %[[insert5:.*]] = or i56 %[[mask5]], %[[shift5]]
+; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert5]] to i64
+; CHECK-NEXT: ret i64 %[[ret]]
+}
+
+define i64 @PR14132(i1 %flag) {
+; CHECK-LABEL: @PR14132(
+; Here we form a PHI-node by promoting the pointer alloca first, and then in
+; order to promote the other two allocas, we speculate the load of the
+; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
+; alloca. While this is a bit dubious, we were asserting on trying to
+; rewrite it. The trick is that the code using the value may carefully take
+; steps to only use the not-undef bits, and so we need to at least loosely
+; support this. This test is particularly interesting because how we handle
+; a load of an i64 from an i8 alloca is dependent on endianness.
+entry:
+  %a = alloca i64, align 8
+  %b = alloca i8, align 8
+  %ptr = alloca i64*, align 8
+; CHECK-NOT: alloca
+
+  %ptr.cast = bitcast i64** %ptr to i8**
+  store i64 0, i64* %a
+  store i8 1, i8* %b
+  store i64* %a, i64** %ptr
+  br i1 %flag, label %if.then, label %if.end
+
+if.then:
+  store i8* %b, i8** %ptr.cast
+  br label %if.end
+; CHECK-NOT: store
+; CHECK: %[[ext:.*]] = zext i8 1 to i64
+; CHECK: %[[shift:.*]] = shl i64 %[[ext]], 56
+
+if.end:
+  %tmp = load i64*, i64** %ptr
+  %result = load i64, i64* %tmp
+; CHECK-NOT: load
+; CHECK: %[[result:.*]] = phi i64 [ %[[shift]], %if.then ], [ 0, %entry ]
+
+  ret i64 %result
+; CHECK-NEXT: ret i64 %[[result]]
+}
+
+declare void @f(i64 %x, i32 %y)
+
+define void @test3() {
+; CHECK-LABEL: @test3(
+;
+; This is a test that specifically exercises the big-endian lowering because it
+; ends up splitting a 64-bit integer into two smaller integers and has a number
+; of tricky aspects (the i24 type) that make that hard. Historically, SROA
+; would miscompile this by either dropping a most significant byte or least
+; significant byte due to shrinking the [4,8) slice to an i24, or by failing to
+; move the bytes around correctly.
+;
+; The magical number 34494054408 is used because it has bits set in various
+; bytes so that it is clear if those bytes fail to be propagated.
+;
+; If you're debugging this, rather than using the direct magical numbers, run
+; the IR through '-sroa -instcombine'. With '-instcombine' these will be
+; constant folded, and if the i64 doesn't round-trip correctly, you've found
+; a bug!
+;
+entry:
+  %a = alloca { i32, i24 }, align 4
+; CHECK-NOT: alloca
+
+  %tmp0 = bitcast { i32, i24 }* %a to i64*
+  store i64 34494054408, i64* %tmp0
+  %tmp1 = load i64, i64* %tmp0, align 4
+  %tmp2 = bitcast { i32, i24 }* %a to i32*
+  %tmp3 = load i32, i32* %tmp2, align 4
+; CHECK: %[[HI_EXT:.*]] = zext i32 134316040 to i64
+; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
+; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
+; CHECK: %[[LO_EXT:.*]] = zext i32 8 to i64
+; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
+; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
+; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
+
+  call void @f(i64 %tmp1, i32 %tmp3)
+; CHECK: call void @f(i64 %[[LO_MERGE]], i32 8)
+  ret void
+; CHECK: ret void
+}
+
+define void @test4() {
+; CHECK-LABEL: @test4
+;
+; Much like @test3, this is specifically testing big-endian management of data.
+; Also similarly, it uses constants with particular bits set to help track
+; whether values are corrupted, and can be easily evaluated by running through
+; -instcombine to see that the i64 round-trips.
+;
+entry:
+  %a = alloca { i32, i24 }, align 4
+  %a2 = alloca i64, align 4
+; CHECK-NOT: alloca
+
+  store i64 34494054408, i64* %a2
+  %tmp0 = bitcast { i32, i24 }* %a to i8*
+  %tmp1 = bitcast i64* %a2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %tmp0, i8* align 4 %tmp1, i64 8, i1 false)
+; CHECK: %[[LO_SHR:.*]] = lshr i64 34494054408, 32
+; CHECK: %[[LO_START:.*]] = trunc i64 %[[LO_SHR]] to i32
+; CHECK: %[[HI_START:.*]] = trunc i64 34494054408 to i32
+
+  %tmp2 = bitcast { i32, i24 }* %a to i64*
+  %tmp3 = load i64, i64* %tmp2, align 4
+  %tmp4 = bitcast { i32, i24 }* %a to i32*
+  %tmp5 = load i32, i32* %tmp4, align 4
+; CHECK: %[[HI_EXT:.*]] = zext i32 %[[HI_START]] to i64
+; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
+; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
+; CHECK: %[[LO_EXT:.*]] = zext i32 %[[LO_START]] to i64
+; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
+; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
+; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
+
+  call void @f(i64 %tmp3, i32 %tmp5)
+; CHECK: call void @f(i64 %[[LO_MERGE]], i32 %[[LO_START]])
+  ret void
+; CHECK: ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)

Added: llvm/trunk/test/Transforms/SROA/dbg-addr-diamond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/dbg-addr-diamond.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/dbg-addr-diamond.ll (added)
+++ llvm/trunk/test/Transforms/SROA/dbg-addr-diamond.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,127 @@
+; RUN: opt -use-dbg-addr -sroa -S < %s | FileCheck %s
+
+; ModuleID = '<stdin>'
+source_filename = "newvars.c"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.24215"
+
+%struct.Pair = type { i32, i32 }
+
+ at pair = internal global %struct.Pair zeroinitializer
+
+; Function Attrs: nounwind uwtable
+define void @if_else(i32 %cond, i32 %a, i32 %b) !dbg !8 {
+entry:
+  %p = alloca %struct.Pair, align 4
+  %0 = bitcast %struct.Pair* %p to i8*, !dbg !25
+  call void @llvm.dbg.addr(metadata %struct.Pair* %p, metadata !20, metadata !DIExpression()), !dbg !26
+  %x = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 0, !dbg !27
+  store i32 %a, i32* %x, align 4, !dbg !28
+  %y = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 1, !dbg !34
+  store i32 %b, i32* %y, align 4, !dbg !35
+  %tobool = icmp ne i32 %cond, 0, !dbg !37
+  br i1 %tobool, label %if.then, label %if.else, !dbg !39
+
+if.then:                                          ; preds = %entry
+  %x1 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 0, !dbg !40
+  store i32 0, i32* %x1, align 4, !dbg !42
+  %y2 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 1, !dbg !43
+  store i32 %a, i32* %y2, align 4, !dbg !44
+  br label %if.end, !dbg !45
+
+if.else:                                          ; preds = %entry
+  %x3 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 0, !dbg !46
+  store i32 %b, i32* %x3, align 4, !dbg !48
+  %y4 = getelementptr inbounds %struct.Pair, %struct.Pair* %p, i32 0, i32 1, !dbg !49
+  store i32 0, i32* %y4, align 4, !dbg !50
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %1 = bitcast %struct.Pair* %p to i8*, !dbg !51
+  %2 = bitcast %struct.Pair* @pair to i8*, !dbg !51
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %2, i8* align 4 %1, i64 8, i1 false), !dbg !51
+  ret void
+}
+
+; CHECK-LABEL: define void @if_else(i32 %cond, i32 %a, i32 %b)
+; CHECK: entry:
+; CHECK:   call void @llvm.dbg.value(metadata i32 %a, metadata ![[PVAR:[0-9]+]], metadata ![[XFRAG:DIExpression\(DW_OP_LLVM_fragment, 0, 32\)]])
+; CHECK:   call void @llvm.dbg.value(metadata i32 %b, metadata ![[PVAR]], metadata ![[YFRAG:DIExpression\(DW_OP_LLVM_fragment, 32, 32\)]])
+; CHECK: if.then:
+; CHECK:   call void @llvm.dbg.value(metadata i32 0, metadata ![[PVAR]], metadata ![[XFRAG]])
+; CHECK:   call void @llvm.dbg.value(metadata i32 %a, metadata ![[PVAR]], metadata ![[YFRAG]])
+; CHECK: if.else:
+; CHECK:   call void @llvm.dbg.value(metadata i32 %b, metadata ![[PVAR]], metadata ![[XFRAG]])
+; CHECK:   call void @llvm.dbg.value(metadata i32 0, metadata ![[PVAR]], metadata ![[YFRAG]])
+; CHECK: if.end:
+; CHECK:   %p.sroa.4.0 = phi i32 [ %a, %if.then ], [ 0, %if.else ]
+; CHECK:   %p.sroa.0.0 = phi i32 [ 0, %if.then ], [ %b, %if.else ]
+; CHECK:   call void @llvm.dbg.value(metadata i32 %p.sroa.0.0, metadata ![[PVAR]], metadata ![[XFRAG]])
+; CHECK:   call void @llvm.dbg.value(metadata i32 %p.sroa.4.0, metadata ![[PVAR]], metadata ![[YFRAG]])
+
+; CHECK: ![[PVAR]] = !DILocalVariable(name: "p", {{.*}})
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #2
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.addr(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "newvars.c", directory: "C:\5Csrc\5Cllvm-project\5Cbuild")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 2}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!7 = !{!"clang version 6.0.0 "}
+!8 = distinct !DISubprogram(name: "if_else", scope: !1, file: !1, line: 2, type: !9, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !16)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11, !14, !14, !14}
+!11 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Pair", file: !1, line: 1, size: 64, elements: !12)
+!12 = !{!13, !15}
+!13 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !11, file: !1, line: 1, baseType: !14, size: 32)
+!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!15 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !11, file: !1, line: 1, baseType: !14, size: 32, offset: 32)
+!16 = !{!17, !18, !19, !20}
+!17 = !DILocalVariable(name: "b", arg: 3, scope: !8, file: !1, line: 2, type: !14)
+!18 = !DILocalVariable(name: "a", arg: 2, scope: !8, file: !1, line: 2, type: !14)
+!19 = !DILocalVariable(name: "cond", arg: 1, scope: !8, file: !1, line: 2, type: !14)
+!20 = !DILocalVariable(name: "p", scope: !8, file: !1, line: 3, type: !11)
+!22 = !DILocation(line: 2, column: 42, scope: !8)
+!23 = !DILocation(line: 2, column: 35, scope: !8)
+!24 = !DILocation(line: 2, column: 25, scope: !8)
+!25 = !DILocation(line: 3, column: 3, scope: !8)
+!26 = !DILocation(line: 3, column: 15, scope: !8)
+!27 = !DILocation(line: 4, column: 5, scope: !8)
+!28 = !DILocation(line: 4, column: 7, scope: !8)
+!29 = !{!30, !31, i64 0}
+!30 = !{!"Pair", !31, i64 0, !31, i64 4}
+!31 = !{!"int", !32, i64 0}
+!32 = !{!"omnipotent char", !33, i64 0}
+!33 = !{!"Simple C/C++ TBAA"}
+!34 = !DILocation(line: 5, column: 5, scope: !8)
+!35 = !DILocation(line: 5, column: 7, scope: !8)
+!36 = !{!30, !31, i64 4}
+!37 = !DILocation(line: 6, column: 7, scope: !38)
+!38 = distinct !DILexicalBlock(scope: !8, file: !1, line: 6, column: 7)
+!39 = !DILocation(line: 6, column: 7, scope: !8)
+!40 = !DILocation(line: 7, column: 7, scope: !41)
+!41 = distinct !DILexicalBlock(scope: !38, file: !1, line: 6, column: 13)
+!42 = !DILocation(line: 7, column: 9, scope: !41)
+!43 = !DILocation(line: 8, column: 7, scope: !41)
+!44 = !DILocation(line: 8, column: 9, scope: !41)
+!45 = !DILocation(line: 9, column: 3, scope: !41)
+!46 = !DILocation(line: 10, column: 7, scope: !47)
+!47 = distinct !DILexicalBlock(scope: !38, file: !1, line: 9, column: 10)
+!48 = !DILocation(line: 10, column: 9, scope: !47)
+!49 = !DILocation(line: 11, column: 7, scope: !47)
+!50 = !DILocation(line: 11, column: 9, scope: !47)
+!51 = !DILocation(line: 13, column: 10, scope: !8)
+!52 = !{i64 0, i64 4, !53, i64 4, i64 4, !53}
+!53 = !{!31, !31, i64 0}
+!54 = !DILocation(line: 14, column: 1, scope: !8)

Added: llvm/trunk/test/Transforms/SROA/dbg-single-piece.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/dbg-single-piece.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/dbg-single-piece.ll (added)
+++ llvm/trunk/test/Transforms/SROA/dbg-single-piece.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt -sroa %s -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%foo = type { [8 x i8], [8 x i8] }
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+define void @_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE() {
+entry:
+  %retval = alloca %foo, align 8
+  call void @llvm.dbg.declare(metadata %foo* %retval, metadata !1, metadata !7), !dbg !8
+; Checks that SROA still inserts a bit_piece expression, even if it produces only one piece
+; (as long as that piece is smaller than the whole thing)
+; CHECK-NOT: call void @llvm.dbg.value
+; CHECK: call void @llvm.dbg.value(metadata %foo* undef, {{.*}}, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg
+; CHECK-NOT: call void @llvm.dbg.value
+  %0 = bitcast %foo* %retval to i8*
+  %1 = getelementptr inbounds i8, i8* %0, i64 8
+  %2 = bitcast i8* %1 to %foo**
+  store %foo* undef, %foo** %2, align 8
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!9}
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !DILocalVariable(name: "I", scope: !2, file: !3, line: 947, type: !4)
+!2 = distinct !DISubprogram(name: "findInsertLocation", linkageName: "_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE", scope: !3, file: !3, line: 937, isLocal: true, isDefinition: true, scopeLine: 938, flags: DIFlagPrototyped, isOptimized: true, unit: !9)
+!3 = !DIFile(filename: "none", directory: ".")
+!4 = !DICompositeType(tag: DW_TAG_class_type, name: "bundle_iterator<llvm::MachineInstr, llvm::ilist_iterator<llvm::MachineInstr> >", scope: !5, file: !3, line: 163, size: 128, align: 64, elements: !6, templateParams: !6, identifier: "_ZTSN4llvm17MachineBasicBlock15bundle_iteratorINS_12MachineInstrENS_14ilist_iteratorIS2_EEEE")
+!5 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "MachineBasicBlock", file: !3, line: 68, size: 1408, align: 64, identifier: "_ZTSN4llvm17MachineBasicBlockE")
+!6 = !{}
+!7 = !DIExpression()
+!8 = !DILocation(line: 947, column: 35, scope: !2)
+!9 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3)

Added: llvm/trunk/test/Transforms/SROA/dead-inst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/dead-inst.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/dead-inst.ll (added)
+++ llvm/trunk/test/Transforms/SROA/dead-inst.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,97 @@
+; SROA fails to rewrite allocs but does rewrite some phis and delete
+; dead instructions. Ensure that this invalidates analyses required
+; for other passes.
+; RUN: opt < %s -passes=bdce,sroa,bdce -o %t -debug-pass-manager 2>&1 | FileCheck %s
+; CHECK: Running pass: BDCEPass on H
+; CHECK: Running analysis: DemandedBitsAnalysis on H
+; CHECK: Running pass: SROA on H
+; CHECK: Invalidating all non-preserved analyses for: H
+; CHECK: Invalidating analysis: DemandedBitsAnalysis on H
+; CHECK: Running pass: BDCEPass on H
+; CHECK: Running analysis: DemandedBitsAnalysis on H
+; CHECK: Finished llvm::Function pass manager run.
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-grtev4-linux-gnu"
+
+%class.b = type { i64 }
+
+declare void @D(%class.b* sret, %class.b* dereferenceable(32)) local_unnamed_addr
+
+; Function Attrs: nounwind
+define hidden fastcc void @H(%class.b* noalias nocapture readnone, [2 x i64]) unnamed_addr {
+  %3 = alloca %class.b, align 8
+  %.sroa.0 = alloca i64, align 8
+  store i64 0, i64* %.sroa.0, align 8
+  %4 = extractvalue [2 x i64] %1, 1
+  switch i64 %4, label %6 [
+    i64 4, label %foo
+    i64 5, label %5
+  ]
+
+; <label>:5:
+  %.sroa.0.0..sroa_cast3 = bitcast i64* %.sroa.0 to i8**
+  br label %12
+
+; <label>:6:
+  %7 = icmp ugt i64 %4, 5
+  %.sroa.0.0..sroa_cast5 = bitcast i64* %.sroa.0 to i8**
+  br i1 %7, label %8, label %12
+
+; <label>:8:
+  %9 = load i8, i8* inttoptr (i64 4 to i8*), align 4
+  %10 = icmp eq i8 %9, 47
+  %11 = select i1 %10, i64 5, i64 4
+  br label %12
+
+; <label>:12:
+  %13 = phi i8** [ %.sroa.0.0..sroa_cast3, %5 ], [ %.sroa.0.0..sroa_cast5, %8 ], [ %.sroa.0.0..sroa_cast5, %6 ]
+  %14 = phi i64 [ 4, %5 ], [ %11, %8 ], [ 4, %6 ]
+  %15 = icmp ne i64 %4, 0
+  %16 = icmp ugt i64 %4, %14
+  %17 = and i1 %15, %16
+  br i1 %17, label %18, label %a.exit
+
+; <label>:18:
+  %19 = tail call i8* @memchr(i8* undef, i32 signext undef, i64 undef)
+  %20 = icmp eq i8* %19, null
+  %21 = sext i1 %20 to i64
+  br label %a.exit
+
+a.exit:
+  %22 = phi i64 [ -1, %12 ], [ %21, %18 ]
+  %23 = load i8*, i8** %13, align 8
+  %24 = sub nsw i64 %22, %14
+  %25 = bitcast %class.b* %3 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %25)
+  %26 = icmp ult i64 %24, 2
+  br i1 %26, label %G.exit, label %27
+
+; <label>:27:
+  %28 = getelementptr inbounds i8, i8* %23, i64 undef
+  %29 = icmp eq i8* %28, null
+  br i1 %29, label %30, label %31
+
+; <label>:30:
+  unreachable
+
+; <label>:31:
+  call void @D(%class.b* nonnull sret %3, %class.b* nonnull dereferenceable(32) undef)
+  br label %G.exit
+
+G.exit:
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %25)
+  br label %foo
+
+foo:
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare i8* @memchr(i8*, i32 signext, i64) local_unnamed_addr
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)

Added: llvm/trunk/test/Transforms/SROA/fca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/fca.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/fca.ll (added)
+++ llvm/trunk/test/Transforms/SROA/fca.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,48 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+define { i32, i32 } @test0(i32 %x, i32 %y) {
+; CHECK-LABEL: @test0(
+; CHECK-NOT: alloca
+; CHECK: insertvalue { i32, i32 }
+; CHECK: insertvalue { i32, i32 }
+; CHECK: ret { i32, i32 }
+
+entry:
+  %a = alloca { i32, i32 }
+
+  store { i32, i32 } undef, { i32, i32 }* %a
+
+  %gep1 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 0
+  store i32 %x, i32* %gep1
+  %gep2 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 1
+  store i32 %y, i32* %gep2
+
+  %result = load { i32, i32 }, { i32, i32 }* %a
+  ret { i32, i32 } %result
+}
+
+define { i32, i32 } @test1(i32 %x, i32 %y) {
+; FIXME: This may be too conservative. Duncan argues that we are allowed to
+; split the volatile load and store here but must produce volatile scalar loads
+; and stores from them.
+; CHECK-LABEL: @test1(
+; CHECK: alloca
+; CHECK: alloca
+; CHECK: load volatile { i32, i32 }, { i32, i32 }*
+; CHECK: store volatile { i32, i32 }
+; CHECK: ret { i32, i32 }
+
+entry:
+  %a = alloca { i32, i32 }
+  %b = alloca { i32, i32 }
+
+  %gep1 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 0
+  store i32 %x, i32* %gep1
+  %gep2 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 1
+  store i32 %y, i32* %gep2
+
+  %result = load volatile { i32, i32 }, { i32, i32 }* %a
+  store volatile { i32, i32 } %result, { i32, i32 }* %b
+  ret { i32, i32 } %result
+}

Added: llvm/trunk/test/Transforms/SROA/mem-par-metadata-sroa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/mem-par-metadata-sroa.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/mem-par-metadata-sroa.ll (added)
+++ llvm/trunk/test/Transforms/SROA/mem-par-metadata-sroa.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,111 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+;
+; Make sure the llvm.access.group meta-data is preserved
+; when a load/store is replaced with another load/store by sroa
+;
+; class Complex {
+; private:
+;  float real_;
+;  float imaginary_;
+;
+; public:
+;   Complex() : real_(0), imaginary_(0) { }
+;   Complex(float real, float imaginary) : real_(real), imaginary_(imaginary) { }
+;   Complex(const Complex &rhs) : real_(rhs.real()), imaginary_(rhs.imaginary()) { }
+; 
+;   inline float real() const { return real_; }
+;   inline float imaginary() const { return imaginary_; }
+; 
+;   Complex operator+(const Complex& rhs) const
+;   {
+;     return Complex(real_ + rhs.real_, imaginary_ + rhs.imaginary_);
+;   }
+; };
+; 
+; void test(Complex *out, long size)
+; {
+;     #pragma clang loop vectorize(assume_safety)
+;     for (long offset = 0; offset < size; ++offset) {
+;       Complex t0 = out[offset];
+;       out[offset] = t0 + t0;
+;     }
+; }
+
+; CHECK: for.body:
+; CHECK-NOT:  store i32 %{{.*}}, i32* %{{.*}}, align 4
+; CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 4, !llvm.access.group !1
+; CHECK-NOT:  store i32 %{{.*}}, i32* %{{.*}}, align 4
+; CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 4, !llvm.access.group !1
+; CHECK-NOT:  store i32 %{{.*}}, i32* %{{.*}}, align 4
+; CHECK: br label
+
+; ModuleID = '<stdin>'
+source_filename = "mem-par-metadata-sroa1.cpp"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.Complex = type { float, float }
+
+; Function Attrs: norecurse nounwind uwtable
+define void @_Z4testP7Complexl(%class.Complex* nocapture %out, i64 %size) local_unnamed_addr #0 {
+entry:
+  %t0 = alloca %class.Complex, align 4
+  %ref.tmp = alloca i64, align 8
+  %tmpcast = bitcast i64* %ref.tmp to %class.Complex*
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %offset.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i64 %offset.0, %size
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.0
+  %real_.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 0
+  %real_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx, i64 0, i32 0
+  %0 = load float, float* %real_.i.i, align 4, !llvm.access.group !11
+  store float %0, float* %real_.i, align 4, !llvm.access.group !11
+  %imaginary_.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 1
+  %imaginary_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx, i64 0, i32 1
+  %1 = load float, float* %imaginary_.i.i, align 4, !llvm.access.group !11
+  store float %1, float* %imaginary_.i, align 4, !llvm.access.group !11
+  %arrayidx1 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.0
+  %real_.i1 = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 0
+  %2 = load float, float* %real_.i1, align 4, !noalias !3, !llvm.access.group !11
+  %real_2.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 0
+  %3 = load float, float* %real_2.i, align 4, !noalias !3, !llvm.access.group !11
+  %add.i = fadd float %2, %3
+  %imaginary_.i2 = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 1
+  %4 = load float, float* %imaginary_.i2, align 4, !noalias !3, !llvm.access.group !11
+  %imaginary_3.i = getelementptr inbounds %class.Complex, %class.Complex* %t0, i64 0, i32 1
+  %5 = load float, float* %imaginary_3.i, align 4, !noalias !3, !llvm.access.group !11
+  %add4.i = fadd float %4, %5
+  %real_.i.i3 = getelementptr inbounds %class.Complex, %class.Complex* %tmpcast, i64 0, i32 0
+  store float %add.i, float* %real_.i.i3, align 4, !alias.scope !3, !llvm.access.group !11
+  %imaginary_.i.i4 = getelementptr inbounds %class.Complex, %class.Complex* %tmpcast, i64 0, i32 1
+  store float %add4.i, float* %imaginary_.i.i4, align 4, !alias.scope !3, !llvm.access.group !11
+  %6 = bitcast %class.Complex* %arrayidx1 to i64*
+  %7 = load i64, i64* %ref.tmp, align 8, !llvm.access.group !11
+  store i64 %7, i64* %6, align 4, !llvm.access.group !11
+  %inc = add nsw i64 %offset.0, 1
+  br label %for.cond, !llvm.loop !1
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
+
+attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 4.0.0 (cfe/trunk 277751)"}
+!1 = distinct !{!1, !2, !{!"llvm.loop.parallel_accesses", !11}}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
+!3 = !{!4}
+!4 = distinct !{!4, !5, !"_ZNK7ComplexplERKS_: %agg.result"}
+!5 = distinct !{!5, !"_ZNK7ComplexplERKS_"}
+!11 = distinct !{}

Added: llvm/trunk/test/Transforms/SROA/non-integral-pointers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/non-integral-pointers.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/non-integral-pointers.ll (added)
+++ llvm/trunk/test/Transforms/SROA/non-integral-pointers.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,88 @@
+; RUN: opt -sroa -S < %s | FileCheck %s
+
+; This test checks that SROA does not introduce ptrtoint and inttoptr
+; casts from and to non-integral pointers.  The "ni:4" bit in the
+; datalayout states that pointers of address space 4 are to be
+; considered "non-integral".
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f0(i1 %alwaysFalse, i64 %val) {
+; CHECK-LABEL: @f0(
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+entry:
+  %loc = alloca i64
+  store i64 %val, i64* %loc
+  br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+neverTaken:
+  %loc.bc = bitcast i64* %loc to i8 addrspace(4)**
+  %ptr = load i8 addrspace(4)*, i8 addrspace(4)** %loc.bc
+  store i8 5, i8 addrspace(4)* %ptr
+  ret void
+
+alwaysTaken:
+  ret void
+}
+
+define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val) {
+; CHECK-LABEL: @f1(
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+entry:
+  %loc = alloca i8 addrspace(4)*
+  store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
+  br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+neverTaken:
+  %loc.bc = bitcast i8 addrspace(4)** %loc to i64*
+  %int = load i64, i64* %loc.bc
+  ret i64 %int
+
+alwaysTaken:
+  ret i64 42
+}
+
+define i64 addrspace(4)* @memset(i1 %alwaysFalse) {
+; CHECK-LABEL: @memset(
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+entry:
+  %x = alloca i64 addrspace(4)*
+  %cast.0 = bitcast i64 addrspace(4)** %x to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %cast.0, i8 5, i64 16, i1 false)
+  br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+neverTaken:
+  %x.field.ld.0 = load i64 addrspace(4)*, i64 addrspace(4)** %x
+  ret i64 addrspace(4)* %x.field.ld.0
+  
+alwaysTaken:
+  ret i64 addrspace(4)* null
+}
+
+;; TODO: This one demonstrates a missed oppurtunity.  The only known bit
+;; pattern for a non-integral bit pattern is that null is zero.  As such
+;; we could do SROA and replace the memset w/a null store.  This will
+;; usually be gotten by instcombine.
+define i64 addrspace(4)* @memset_null(i1 %alwaysFalse) {
+; CHECK-LABEL: @memset_null(
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+entry:
+  %x = alloca i64 addrspace(4)*
+  %cast.0 = bitcast i64 addrspace(4)** %x to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %cast.0, i8 0, i64 16, i1 false)
+  br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+neverTaken:
+  %x.field.ld.0 = load i64 addrspace(4)*, i64 addrspace(4)** %x
+  ret i64 addrspace(4)* %x.field.ld.0
+  
+alwaysTaken:
+  ret i64 addrspace(4)* null
+}
+
+declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1)

Added: llvm/trunk/test/Transforms/SROA/phi-and-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/phi-and-select.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/phi-and-select.ll (added)
+++ llvm/trunk/test/Transforms/SROA/phi-and-select.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,646 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+define i32 @test1() {
+; CHECK-LABEL: @test1(
+entry:
+	%a = alloca [2 x i32]
+; CHECK-NOT: alloca
+
+  %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0
+  %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
+	store i32 0, i32* %a0
+	store i32 1, i32* %a1
+	%v0 = load i32, i32* %a0
+	%v1 = load i32, i32* %a1
+; CHECK-NOT: store
+; CHECK-NOT: load
+
+	%cond = icmp sle i32 %v0, %v1
+	br i1 %cond, label %then, label %exit
+
+then:
+	br label %exit
+
+exit:
+	%phi = phi i32* [ %a1, %then ], [ %a0, %entry ]
+; CHECK: phi i32 [ 1, %{{.*}} ], [ 0, %{{.*}} ]
+
+	%result = load i32, i32* %phi
+	ret i32 %result
+}
+
+define i32 @test2() {
+; CHECK-LABEL: @test2(
+entry:
+	%a = alloca [2 x i32]
+; CHECK-NOT: alloca
+
+  %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0
+  %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
+	store i32 0, i32* %a0
+	store i32 1, i32* %a1
+	%v0 = load i32, i32* %a0
+	%v1 = load i32, i32* %a1
+; CHECK-NOT: store
+; CHECK-NOT: load
+
+	%cond = icmp sle i32 %v0, %v1
+	%select = select i1 %cond, i32* %a1, i32* %a0
+; CHECK: select i1 %{{.*}}, i32 1, i32 0
+
+	%result = load i32, i32* %select
+	ret i32 %result
+}
+
+define i32 @test3(i32 %x) {
+; CHECK-LABEL: @test3(
+entry:
+	%a = alloca [2 x i32]
+; CHECK-NOT: alloca
+
+  ; Note that we build redundant GEPs here to ensure that having different GEPs
+  ; into the same alloca partation continues to work with PHI speculation. This
+  ; was the underlying cause of PR13926.
+  %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0
+  %a0b = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0
+  %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
+  %a1b = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
+	store i32 0, i32* %a0
+	store i32 1, i32* %a1
+; CHECK-NOT: store
+
+  switch i32 %x, label %bb0 [ i32 1, label %bb1
+                              i32 2, label %bb2
+                              i32 3, label %bb3
+                              i32 4, label %bb4
+                              i32 5, label %bb5
+                              i32 6, label %bb6
+                              i32 7, label %bb7 ]
+
+bb0:
+	br label %exit
+bb1:
+	br label %exit
+bb2:
+	br label %exit
+bb3:
+	br label %exit
+bb4:
+	br label %exit
+bb5:
+	br label %exit
+bb6:
+	br label %exit
+bb7:
+	br label %exit
+
+exit:
+	%phi = phi i32* [ %a1, %bb0 ], [ %a0, %bb1 ], [ %a0, %bb2 ], [ %a1, %bb3 ],
+                  [ %a1b, %bb4 ], [ %a0b, %bb5 ], [ %a0b, %bb6 ], [ %a1b, %bb7 ]
+; CHECK: phi i32 [ 1, %{{.*}} ], [ 0, %{{.*}} ], [ 0, %{{.*}} ], [ 1, %{{.*}} ], [ 1, %{{.*}} ], [ 0, %{{.*}} ], [ 0, %{{.*}} ], [ 1, %{{.*}} ]
+
+	%result = load i32, i32* %phi
+	ret i32 %result
+}
+
+define i32 @test4() {
+; CHECK-LABEL: @test4(
+entry:
+	%a = alloca [2 x i32]
+; CHECK-NOT: alloca
+
+  %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0
+  %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
+	store i32 0, i32* %a0
+	store i32 1, i32* %a1
+	%v0 = load i32, i32* %a0
+	%v1 = load i32, i32* %a1
+; CHECK-NOT: store
+; CHECK-NOT: load
+
+	%cond = icmp sle i32 %v0, %v1
+	%select = select i1 %cond, i32* %a0, i32* %a0
+; CHECK-NOT: select
+
+	%result = load i32, i32* %select
+	ret i32 %result
+; CHECK: ret i32 0
+}
+
+define i32 @test5(i32* %b) {
+; CHECK-LABEL: @test5(
+entry:
+	%a = alloca [2 x i32]
+; CHECK-NOT: alloca
+
+  %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
+	store i32 1, i32* %a1
+; CHECK-NOT: store
+
+	%select = select i1 true, i32* %a1, i32* %b
+; CHECK-NOT: select
+
+	%result = load i32, i32* %select
+; CHECK-NOT: load
+
+	ret i32 %result
+; CHECK: ret i32 1
+}
+
+declare void @f(i32*, i32*)
+
+define i32 @test6(i32* %b) {
+; CHECK-LABEL: @test6(
+entry:
+	%a = alloca [2 x i32]
+  %c = alloca i32
+; CHECK-NOT: alloca
+
+  %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
+	store i32 1, i32* %a1
+
+	%select = select i1 true, i32* %a1, i32* %b
+	%select2 = select i1 false, i32* %a1, i32* %b
+  %select3 = select i1 false, i32* %c, i32* %b
+; CHECK: %[[select2:.*]] = select i1 false, i32* undef, i32* %b
+; CHECK: %[[select3:.*]] = select i1 false, i32* undef, i32* %b
+
+  ; Note, this would potentially escape the alloca pointer except for the
+  ; constant folding of the select.
+  call void @f(i32* %select2, i32* %select3)
+; CHECK: call void @f(i32* %[[select2]], i32* %[[select3]])
+
+
+	%result = load i32, i32* %select
+; CHECK-NOT: load
+
+  %dead = load i32, i32* %c
+
+	ret i32 %result
+; CHECK: ret i32 1
+}
+
+define i32 @test7() {
+; CHECK-LABEL: @test7(
+; CHECK-NOT: alloca
+
+entry:
+  %X = alloca i32
+  br i1 undef, label %good, label %bad
+
+good:
+  %Y1 = getelementptr i32, i32* %X, i64 0
+  store i32 0, i32* %Y1
+  br label %exit
+
+bad:
+  %Y2 = getelementptr i32, i32* %X, i64 1
+  store i32 0, i32* %Y2
+  br label %exit
+
+exit:
+	%P = phi i32* [ %Y1, %good ], [ %Y2, %bad ]
+; CHECK: %[[phi:.*]] = phi i32 [ 0, %good ],
+  %Z2 = load i32, i32* %P
+  ret i32 %Z2
+; CHECK: ret i32 %[[phi]]
+}
+
+define i32 @test8(i32 %b, i32* %ptr) {
+; Ensure that we rewrite allocas to the used type when that use is hidden by
+; a PHI that can be speculated.
+; CHECK-LABEL: @test8(
+; CHECK-NOT: alloca
+; CHECK-NOT: load
+; CHECK: %[[value:.*]] = load i32, i32* %ptr
+; CHECK-NOT: load
+; CHECK: %[[result:.*]] = phi i32 [ undef, %else ], [ %[[value]], %then ]
+; CHECK-NEXT: ret i32 %[[result]]
+
+entry:
+  %f = alloca float
+  %test = icmp ne i32 %b, 0
+  br i1 %test, label %then, label %else
+
+then:
+  br label %exit
+
+else:
+  %bitcast = bitcast float* %f to i32*
+  br label %exit
+
+exit:
+  %phi = phi i32* [ %bitcast, %else ], [ %ptr, %then ]
+  %loaded = load i32, i32* %phi, align 4
+  ret i32 %loaded
+}
+
+define i32 @test9(i32 %b, i32* %ptr) {
+; Same as @test8 but for a select rather than a PHI node.
+; CHECK-LABEL: @test9(
+; CHECK-NOT: alloca
+; CHECK-NOT: load
+; CHECK: %[[value:.*]] = load i32, i32* %ptr
+; CHECK-NOT: load
+; CHECK: %[[result:.*]] = select i1 %{{.*}}, i32 undef, i32 %[[value]]
+; CHECK-NEXT: ret i32 %[[result]]
+
+entry:
+  %f = alloca float
+  store i32 0, i32* %ptr
+  %test = icmp ne i32 %b, 0
+  %bitcast = bitcast float* %f to i32*
+  %select = select i1 %test, i32* %bitcast, i32* %ptr
+  %loaded = load i32, i32* %select, align 4
+  ret i32 %loaded
+}
+
+define float @test10(i32 %b, float* %ptr) {
+; Don't try to promote allocas which are not elligible for it even after
+; rewriting due to the necessity of inserting bitcasts when speculating a PHI
+; node.
+; CHECK-LABEL: @test10(
+; CHECK: %[[alloca:.*]] = alloca
+; CHECK: %[[argvalue:.*]] = load float, float* %ptr
+; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
+; CHECK: %[[allocavalue:.*]] = load float, float* %[[cast]]
+; CHECK: %[[result:.*]] = phi float [ %[[allocavalue]], %else ], [ %[[argvalue]], %then ]
+; CHECK-NEXT: ret float %[[result]]
+
+entry:
+  %f = alloca double
+  store double 0.0, double* %f
+  %test = icmp ne i32 %b, 0
+  br i1 %test, label %then, label %else
+
+then:
+  br label %exit
+
+else:
+  %bitcast = bitcast double* %f to float*
+  br label %exit
+
+exit:
+  %phi = phi float* [ %bitcast, %else ], [ %ptr, %then ]
+  %loaded = load float, float* %phi, align 4
+  ret float %loaded
+}
+
+define float @test11(i32 %b, float* %ptr) {
+; Same as @test10 but for a select rather than a PHI node.
+; CHECK-LABEL: @test11(
+; CHECK: %[[alloca:.*]] = alloca
+; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
+; CHECK: %[[allocavalue:.*]] = load float, float* %[[cast]]
+; CHECK: %[[argvalue:.*]] = load float, float* %ptr
+; CHECK: %[[result:.*]] = select i1 %{{.*}}, float %[[allocavalue]], float %[[argvalue]]
+; CHECK-NEXT: ret float %[[result]]
+
+entry:
+  %f = alloca double
+  store double 0.0, double* %f
+  store float 0.0, float* %ptr
+  %test = icmp ne i32 %b, 0
+  %bitcast = bitcast double* %f to float*
+  %select = select i1 %test, float* %bitcast, float* %ptr
+  %loaded = load float, float* %select, align 4
+  ret float %loaded
+}
+
+define i32 @test12(i32 %x, i32* %p) {
+; Ensure we don't crash or fail to nuke dead selects of allocas if no load is
+; never found.
+; CHECK-LABEL: @test12(
+; CHECK-NOT: alloca
+; CHECK-NOT: select
+; CHECK: ret i32 %x
+
+entry:
+  %a = alloca i32
+  store i32 %x, i32* %a
+  %dead = select i1 undef, i32* %a, i32* %p
+  %load = load i32, i32* %a
+  ret i32 %load
+}
+
+define i32 @test13(i32 %x, i32* %p) {
+; Ensure we don't crash or fail to nuke dead phis of allocas if no load is ever
+; found.
+; CHECK-LABEL: @test13(
+; CHECK-NOT: alloca
+; CHECK-NOT: phi
+; CHECK: ret i32 %x
+
+entry:
+  %a = alloca i32
+  store i32 %x, i32* %a
+  br label %loop
+
+loop:
+  %phi = phi i32* [ %p, %entry ], [ %a, %loop ]
+  br i1 undef, label %loop, label %exit
+
+exit:
+  %load = load i32, i32* %a
+  ret i32 %load
+}
+
+define i32 @test14(i1 %b1, i1 %b2, i32* %ptr) {
+; Check for problems when there are both selects and phis and one is
+; speculatable toward promotion but the other is not. That should block all of
+; the speculation.
+; CHECK-LABEL: @test14(
+; CHECK: alloca
+; CHECK: alloca
+; CHECK: select
+; CHECK: phi
+; CHECK: phi
+; CHECK: select
+; CHECK: ret i32
+
+entry:
+  %f = alloca i32
+  %g = alloca i32
+  store i32 0, i32* %f
+  store i32 0, i32* %g
+  %f.select = select i1 %b1, i32* %f, i32* %ptr
+  br i1 %b2, label %then, label %else
+
+then:
+  br label %exit
+
+else:
+  br label %exit
+
+exit:
+  %f.phi = phi i32* [ %f, %then ], [ %f.select, %else ]
+  %g.phi = phi i32* [ %g, %then ], [ %ptr, %else ]
+  %f.loaded = load i32, i32* %f.phi
+  %g.select = select i1 %b1, i32* %g, i32* %g.phi
+  %g.loaded = load i32, i32* %g.select
+  %result = add i32 %f.loaded, %g.loaded
+  ret i32 %result
+}
+
+define i32 @PR13905() {
+; Check a pattern where we have a chain of dead phi nodes to ensure they are
+; deleted and promotion can proceed.
+; CHECK-LABEL: @PR13905(
+; CHECK-NOT: alloca i32
+; CHECK: ret i32 undef
+
+entry:
+  %h = alloca i32
+  store i32 0, i32* %h
+  br i1 undef, label %loop1, label %exit
+
+loop1:
+  %phi1 = phi i32* [ null, %entry ], [ %h, %loop1 ], [ %h, %loop2 ]
+  br i1 undef, label %loop1, label %loop2
+
+loop2:
+  br i1 undef, label %loop1, label %exit
+
+exit:
+  %phi2 = phi i32* [ %phi1, %loop2 ], [ null, %entry ]
+  ret i32 undef
+}
+
+define i32 @PR13906() {
+; Another pattern which can lead to crashes due to failing to clear out dead
+; PHI nodes or select nodes. This triggers subtly differently from the above
+; cases because the PHI node is (recursively) alive, but the select is dead.
+; CHECK-LABEL: @PR13906(
+; CHECK-NOT: alloca
+
+entry:
+  %c = alloca i32
+  store i32 0, i32* %c
+  br label %for.cond
+
+for.cond:
+  %d.0 = phi i32* [ undef, %entry ], [ %c, %if.then ], [ %d.0, %for.cond ]
+  br i1 undef, label %if.then, label %for.cond
+
+if.then:
+  %tmpcast.d.0 = select i1 undef, i32* %c, i32* %d.0
+  br label %for.cond
+}
+
+define i64 @PR14132(i1 %flag) {
+; CHECK-LABEL: @PR14132(
+; Here we form a PHI-node by promoting the pointer alloca first, and then in
+; order to promote the other two allocas, we speculate the load of the
+; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
+; alloca. While this is a bit dubious, we were asserting on trying to
+; rewrite it. The trick is that the code using the value may carefully take
+; steps to only use the not-undef bits, and so we need to at least loosely
+; support this..
+entry:
+  %a = alloca i64, align 8
+  %b = alloca i8, align 8
+  %ptr = alloca i64*, align 8
+; CHECK-NOT: alloca
+
+  %ptr.cast = bitcast i64** %ptr to i8**
+  store i64 0, i64* %a, align 8
+  store i8 1, i8* %b, align 8
+  store i64* %a, i64** %ptr, align 8
+  br i1 %flag, label %if.then, label %if.end
+
+if.then:
+  store i8* %b, i8** %ptr.cast, align 8
+  br label %if.end
+; CHECK-NOT: store
+; CHECK: %[[ext:.*]] = zext i8 1 to i64
+
+if.end:
+  %tmp = load i64*, i64** %ptr, align 8
+  %result = load i64, i64* %tmp, align 8
+; CHECK-NOT: load
+; CHECK: %[[result:.*]] = phi i64 [ %[[ext]], %if.then ], [ 0, %entry ]
+
+  ret i64 %result
+; CHECK-NEXT: ret i64 %[[result]]
+}
+
+define float @PR16687(i64 %x, i1 %flag) {
+; CHECK-LABEL: @PR16687(
+; Check that even when we try to speculate the same phi twice (in two slices)
+; on an otherwise promotable construct, we don't get ahead of ourselves and try
+; to promote one of the slices prior to speculating it.
+
+entry:
+  %a = alloca i64, align 8
+  store i64 %x, i64* %a
+  br i1 %flag, label %then, label %else
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK: %[[lo:.*]] = trunc i64 %x to i32
+; CHECK: %[[shift:.*]] = lshr i64 %x, 32
+; CHECK: %[[hi:.*]] = trunc i64 %[[shift]] to i32
+
+then:
+  %a.f = bitcast i64* %a to float*
+  br label %end
+; CHECK: %[[lo_cast:.*]] = bitcast i32 %[[lo]] to float
+
+else:
+  %a.raw = bitcast i64* %a to i8*
+  %a.raw.4 = getelementptr i8, i8* %a.raw, i64 4
+  %a.raw.4.f = bitcast i8* %a.raw.4 to float*
+  br label %end
+; CHECK: %[[hi_cast:.*]] = bitcast i32 %[[hi]] to float
+
+end:
+  %a.phi.f = phi float* [ %a.f, %then ], [ %a.raw.4.f, %else ]
+  %f = load float, float* %a.phi.f
+  ret float %f
+; CHECK: %[[phi:.*]] = phi float [ %[[lo_cast]], %then ], [ %[[hi_cast]], %else ]
+; CHECK-NOT: load
+; CHECK: ret float %[[phi]]
+}
+
+; Verifies we fixed PR20425. We should be able to promote all alloca's to
+; registers in this test.
+;
+; %0 = slice
+; %1 = slice
+; %2 = phi(%0, %1) // == slice
+define float @simplify_phi_nodes_that_equal_slice(i1 %cond, float* %temp) {
+; CHECK-LABEL: @simplify_phi_nodes_that_equal_slice(
+entry:
+  %arr = alloca [4 x float], align 4
+; CHECK-NOT: alloca
+  br i1 %cond, label %then, label %else
+
+then:
+  %0 = getelementptr inbounds [4 x float], [4 x float]* %arr, i64 0, i64 3
+  store float 1.000000e+00, float* %0, align 4
+  br label %merge
+
+else:
+  %1 = getelementptr inbounds [4 x float], [4 x float]* %arr, i64 0, i64 3
+  store float 2.000000e+00, float* %1, align 4
+  br label %merge
+
+merge:
+  %2 = phi float* [ %0, %then ], [ %1, %else ]
+  store float 0.000000e+00, float* %temp, align 4
+  %3 = load float, float* %2, align 4
+  ret float %3
+}
+
+; A slightly complicated example for PR20425.
+;
+; %0 = slice
+; %1 = phi(%0) // == slice
+; %2 = slice
+; %3 = phi(%1, %2) // == slice
+define float @simplify_phi_nodes_that_equal_slice_2(i1 %cond, float* %temp) {
+; CHECK-LABEL: @simplify_phi_nodes_that_equal_slice_2(
+entry:
+  %arr = alloca [4 x float], align 4
+; CHECK-NOT: alloca
+  br i1 %cond, label %then, label %else
+
+then:
+  %0 = getelementptr inbounds [4 x float], [4 x float]* %arr, i64 0, i64 3
+  store float 1.000000e+00, float* %0, align 4
+  br label %then2
+
+then2:
+  %1 = phi float* [ %0, %then ]
+  store float 2.000000e+00, float* %1, align 4
+  br label %merge
+
+else:
+  %2 = getelementptr inbounds [4 x float], [4 x float]* %arr, i64 0, i64 3
+  store float 3.000000e+00, float* %2, align 4
+  br label %merge
+
+merge:
+  %3 = phi float* [ %1, %then2 ], [ %2, %else ]
+  store float 0.000000e+00, float* %temp, align 4
+  %4 = load float, float* %3, align 4
+  ret float %4
+}
+
+%struct.S = type { i32 }
+
+; Verifies we fixed PR20822. We have a foldable PHI feeding a speculatable PHI
+; which requires the rewriting of the speculated PHI to handle insertion
+; when the incoming pointer is itself from a PHI node. We would previously
+; insert a bitcast instruction *before* a PHI, producing an invalid module;
+; make sure we insert *after* the first non-PHI instruction.
+define void @PR20822() {
+; CHECK-LABEL: @PR20822(
+entry:
+  %f = alloca %struct.S, align 4
+; CHECK: %[[alloca:.*]] = alloca
+  br i1 undef, label %if.end, label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  br label %if.end
+
+if.end:                                           ; preds = %for.cond, %entry
+  %f2 = phi %struct.S* [ %f, %entry ], [ %f, %for.cond ]
+; CHECK: phi i32
+; CHECK: %[[cast:.*]] = bitcast i32* %[[alloca]] to %struct.S*
+  phi i32 [ undef, %entry ], [ undef, %for.cond ]
+  br i1 undef, label %if.then5, label %if.then2
+
+if.then2:                                         ; preds = %if.end
+  br label %if.then5
+
+if.then5:                                         ; preds = %if.then2, %if.end
+  %f1 = phi %struct.S* [ undef, %if.then2 ], [ %f2, %if.end ]
+; CHECK: phi {{.*}} %[[cast]]
+  store %struct.S undef, %struct.S* %f1, align 4
+  ret void
+}
+
+define i32 @phi_align(i32* %z) {
+; CHECK-LABEL: @phi_align(
+entry:
+  %a = alloca [8 x i8], align 8
+; CHECK: alloca [7 x i8]
+
+  %a0x = getelementptr [8 x i8], [8 x i8]* %a, i64 0, i32 1
+  %a0 = bitcast i8* %a0x to i32*
+  %a1x = getelementptr [8 x i8], [8 x i8]* %a, i64 0, i32 4
+  %a1 = bitcast i8* %a1x to i32*
+; CHECK: store i32 0, {{.*}}, align 1
+  store i32 0, i32* %a0, align 1
+; CHECK: store i32 1, {{.*}}, align 1
+  store i32 1, i32* %a1, align 4
+; CHECK: load {{.*}}, align 1
+  %v0 = load i32, i32* %a0, align 1
+; CHECK: load {{.*}}, align 1
+  %v1 = load i32, i32* %a1, align 4
+  %cond = icmp sle i32 %v0, %v1
+  br i1 %cond, label %then, label %exit
+
+then:
+  br label %exit
+
+exit:
+; CHECK: %phi = phi i32* [ {{.*}}, %then ], [ %z, %entry ]
+; CHECK-NEXT: %result = load i32, i32* %phi, align 1
+  %phi = phi i32* [ %a1, %then ], [ %z, %entry ]
+  %result = load i32, i32* %phi, align 4
+  ret i32 %result
+}
+
+; Don't speculate a load based on an earlier volatile operation.
+define i8 @volatile_select(i8* %p, i1 %b) {
+; CHECK-LABEL: @volatile_select(
+; CHECK: select i1 %b, i8* %p, i8* %p2
+  %p2 = alloca i8
+  store i8 0, i8* %p2
+  store volatile i8 0, i8* %p
+  %px = select i1 %b, i8* %p, i8* %p2
+  %v2 = load i8, i8* %px
+  ret i8 %v2
+}

Added: llvm/trunk/test/Transforms/SROA/phi-with-duplicate-pred.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/phi-with-duplicate-pred.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/phi-with-duplicate-pred.ll (added)
+++ llvm/trunk/test/Transforms/SROA/phi-with-duplicate-pred.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+ at a = external global i16, align 1
+
+define void @f2() {
+; CHECK-LABEL: @f2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[CLEANUP:%.*]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    [[G_0_SROA_SPECULATE_LOAD_CLEANUP:%.*]] = load i16, i16* @a, align 1
+; CHECK-NEXT:    switch i32 2, label [[CLEANUP7:%.*]] [
+; CHECK-NEXT:    i32 0, label [[LBL1:%.*]]
+; CHECK-NEXT:    i32 2, label [[LBL1]]
+; CHECK-NEXT:    ]
+; CHECK:       if.else:
+; CHECK-NEXT:    br label [[LBL1]]
+; CHECK:       lbl1:
+; CHECK-NEXT:    [[G_0_SROA_SPECULATED:%.*]] = phi i16 [ [[G_0_SROA_SPECULATE_LOAD_CLEANUP]], [[CLEANUP]] ], [ [[G_0_SROA_SPECULATE_LOAD_CLEANUP]], [[CLEANUP]] ], [ undef, [[IF_ELSE]] ]
+; CHECK-NEXT:    unreachable
+; CHECK:       cleanup7:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %e = alloca i16, align 1
+  br i1 undef, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  br label %cleanup
+
+cleanup:                                          ; preds = %if.then
+  switch i32 2, label %cleanup7 [
+  i32 0, label %lbl1
+  i32 2, label %lbl1
+  ]
+
+if.else:                                          ; preds = %entry
+  br label %lbl1
+
+lbl1:                                             ; preds = %if.else, %cleanup, %cleanup
+  %g.0 = phi i16* [ @a, %cleanup ], [ @a, %cleanup ], [ %e, %if.else ]
+  %0 = load i16, i16* %g.0, align 1
+  unreachable
+
+cleanup7:                                         ; preds = %cleanup
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/SROA/pointer-offset-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/pointer-offset-size.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/pointer-offset-size.ll (added)
+++ llvm/trunk/test/Transforms/SROA/pointer-offset-size.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64:32"
+
+%struct.test = type { %struct.basic, %struct.basic }
+%struct.basic = type { i16, i8 }
+
+define i16 @test(%struct.test* %ts2.i) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S_SROA_0:%.*]] = alloca [3 x i8], align 2
+; CHECK-NEXT:    [[S_SROA_0_0__SROA_CAST:%.*]] = bitcast %struct.test* [[TS2_I:%.*]] to i8*
+; CHECK-NEXT:    [[S_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [3 x i8], [3 x i8]* [[S_SROA_0]], i32 0, i32 0
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[S_SROA_0_0__SROA_CAST]], i8* align 2 [[S_SROA_0_0__SROA_IDX]], i32 3, i1 false)
+; CHECK-NEXT:    [[X1_I_I:%.*]] = getelementptr inbounds [[STRUCT_TEST:%.*]], %struct.test* [[TS2_I]], i32 0, i32 0, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[X1_I_I]]
+; CHECK-NEXT:    ret i16 [[TMP0]]
+;
+entry:
+  %s = alloca %struct.test
+  %0 = bitcast %struct.test* %ts2.i to i8*
+  %1 = bitcast %struct.test* %s to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 3, i1 false)
+  %x1.i.i = getelementptr inbounds %struct.test, %struct.test* %ts2.i, i32 0, i32 0, i32 0
+  %2 = load i16, i16* %x1.i.i
+  ret i16 %2
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1)

Added: llvm/trunk/test/Transforms/SROA/ppcf128-no-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/ppcf128-no-fold.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/ppcf128-no-fold.ll (added)
+++ llvm/trunk/test/Transforms/SROA/ppcf128-no-fold.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -sroa -S | FileCheck %s 
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.ld2 = type { [2 x ppc_fp128] }
+declare void @bar(i8*, [2 x i128])
+
+define void @foo(i8* %v) #0 {
+entry:
+  %v.addr = alloca i8*, align 8
+  %z = alloca %struct.ld2, align 16
+  store i8* %v, i8** %v.addr, align 8
+  %dat = getelementptr inbounds %struct.ld2, %struct.ld2* %z, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x ppc_fp128], [2 x ppc_fp128]* %dat, i32 0, i64 0
+  store ppc_fp128 0xM403B0000000000000000000000000000, ppc_fp128* %arrayidx, align 16
+  %dat1 = getelementptr inbounds %struct.ld2, %struct.ld2* %z, i32 0, i32 0
+  %arrayidx2 = getelementptr inbounds [2 x ppc_fp128], [2 x ppc_fp128]* %dat1, i32 0, i64 1
+  store ppc_fp128 0xM4093B400000000000000000000000000, ppc_fp128* %arrayidx2, align 16
+  %0 = load i8*, i8** %v.addr, align 8
+  %coerce.dive = getelementptr %struct.ld2, %struct.ld2* %z, i32 0, i32 0
+  %1 = bitcast [2 x ppc_fp128]* %coerce.dive to [2 x i128]*
+  %2 = load [2 x i128], [2 x i128]* %1, align 1
+  call void @bar(i8* %0, [2 x i128] %2)
+  ret void
+}
+
+; CHECK-LABEL: @foo
+; CHECK-NOT: i128 4628293042053316608
+; CHECK-NOT: i128 4653260752096854016
+; CHECK-DAG: i128 bitcast (ppc_fp128 0xM403B0000000000000000000000000000 to i128)
+; CHECK-DAG: i128 bitcast (ppc_fp128 0xM4093B400000000000000000000000000 to i128)
+; CHECK: call void @bar(i8* %v, [2 x i128]
+; CHECK: ret void
+
+attributes #0 = { nounwind }
+

Added: llvm/trunk/test/Transforms/SROA/pr26972.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/pr26972.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/pr26972.ll (added)
+++ llvm/trunk/test/Transforms/SROA/pr26972.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,17 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux"
+
+; Make sure we properly handle allocas where the allocated
+; size overflows a uint32_t. This specific constant results in
+; the size in bits being 32 after truncation to a 32-bit int.
+; CHECK-LABEL: fn1
+; CHECK-NEXT: ret void
+define void @fn1() {
+  %a = alloca [1073741825 x i32], align 16
+  %t0 = bitcast [1073741825 x i32]* %a to i8*
+  call void @llvm.lifetime.end.p0i8(i64 4294967300, i8* %t0)
+  ret void
+}
+
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)

Added: llvm/trunk/test/Transforms/SROA/pr37267.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/pr37267.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/pr37267.ll (added)
+++ llvm/trunk/test/Transforms/SROA/pr37267.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,74 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32:64-S128"
+target triple = "sparcv9-sun-solaris"
+
+; PR37267
+; Check that we don't crash on this test.
+
+define i16 @f1() {
+; CHECK-LABEL: @f1
+; CHECK: %[[retval:.*]] = add i16 2, 2
+; CHECK: ret i16 %[[retval]]
+
+bb1:
+; This 12-byte alloca is split into partitions as [0,2), [2,4), [4,8), [8,10), [10, 12).
+; The reported error happened when rewriteIntegerStore try to widen a split tail of slice 1 for [4, 8) partition.
+; alloca  012345678901
+; slice 1:  WWWW
+; slice 2:        WWWW
+; slice 3:        RR
+; slice 4:  RR
+
+  %a.3 = alloca [6 x i16], align 1
+; slice 1: [2,6)
+  %_tmp3 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 1
+  %_tmp5 = bitcast i16* %_tmp3 to i32*
+  store i32 131074, i32* %_tmp5, align 1
+; slice 2: [8,12)
+  %_tmp8 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 4
+  %_tmp10 = bitcast i16* %_tmp8 to i32*
+  store i32 131074, i32* %_tmp10, align 1
+; slice 3: [8,10)
+  %_tmp12 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 4
+  %_tmp13 = load i16, i16* %_tmp12, align 1
+; slice 4: [2,4)
+  %_tmp15 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 1
+  %_tmp16 = load i16, i16* %_tmp15, align 1
+
+  %rc = add i16 %_tmp13, %_tmp16
+  ret i16 %rc
+}
+
+define i16 @f2() {
+; CHECK-LABEL: @f2
+; CHECK: %[[retval:.*]] = add i16 2, undef
+; CHECK: ret i16 %[[retval]]
+
+bb1:
+; This 12-byte alloca is split into partitions as [0,2), [2,4), [4,8), [8,10), [10, 12).
+; The reported error happened when visitLoadInst rewrites a split tail of slice 1 for [4, 8) partition.
+; alloca  012345678901
+; slice 1:  RRRR
+; slice 2:        WWWW
+; slice 3:        RR
+; slice 4:  RR
+
+  %a.3 = alloca [6 x i16], align 1
+; slice 1: [2,6)
+  %_tmp3 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 1
+  %_tmp5 = bitcast i16* %_tmp3 to i32*
+  %_tmp6 = load i32, i32* %_tmp5, align 1
+; slice 2: [8,12)
+  %_tmp8 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 4
+  %_tmp10 = bitcast i16* %_tmp8 to i32*
+  store i32 131074, i32* %_tmp10, align 1
+; slice 3: [8,10)
+  %_tmp12 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 4
+  %_tmp13 = load i16, i16* %_tmp12, align 1
+; slice 4: [2,4)
+  %_tmp15 = getelementptr inbounds [6 x i16], [6 x i16]* %a.3, i16 0, i16 1
+  %_tmp16 = load i16, i16* %_tmp15, align 1
+
+  %rc = add i16 %_tmp13, %_tmp16
+  ret i16 %rc
+}

Added: llvm/trunk/test/Transforms/SROA/preserve-nonnull.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/preserve-nonnull.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/preserve-nonnull.ll (added)
+++ llvm/trunk/test/Transforms/SROA/preserve-nonnull.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,92 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+;
+; Make sure that SROA doesn't lose nonnull metadata
+; on loads from allocas that get optimized out.
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
+
+; Check that we do basic propagation of nonnull when rewriting.
+define i8* @propagate_nonnull(i32* %v) {
+; CHECK-LABEL: define i8* @propagate_nonnull(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[A:.*]] = alloca i8*
+; CHECK-NEXT:    %[[V_CAST:.*]] = bitcast i32* %v to i8*
+; CHECK-NEXT:    store i8* %[[V_CAST]], i8** %[[A]]
+; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0
+; CHECK-NEXT:    ret i8* %[[LOAD]]
+entry:
+  %a = alloca [2 x i8*]
+  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+  %a.gep0.cast = bitcast i8** %a.gep0 to i32**
+  %a.gep1.cast = bitcast i8** %a.gep1 to i32**
+  store i32* %v, i32** %a.gep1.cast
+  store i32* null, i32** %a.gep0.cast
+  %load = load volatile i8*, i8** %a.gep1, !nonnull !0
+  ret i8* %load
+}
+
+define float* @turn_nonnull_into_assume(float** %arg) {
+; CHECK-LABEL: define float* @turn_nonnull_into_assume(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[RETURN:.*]] = load float*, float** %arg, align 8
+; CHECK-NEXT:    %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
+; CHECK-NEXT:    call void @llvm.assume(i1 %[[ASSUME]])
+; CHECK-NEXT:    ret float* %[[RETURN]]
+entry:
+  %buf = alloca float*
+  %_arg_i8 = bitcast float** %arg to i8*
+  %_buf_i8 = bitcast float** %buf to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %_buf_i8, i8* align 8 %_arg_i8, i64 8, i1 false)
+  %ret = load float*, float** %buf, align 8, !nonnull !0
+  ret float* %ret
+}
+
+; Make sure we properly handle the !nonnull attribute when we convert
+; a pointer load to an integer load.
+; FIXME: While this doesn't do anythnig actively harmful today, it really
+; should propagate the !nonnull metadata to range metadata. The irony is, it
+; *does* initially, but then we lose that !range metadata before we finish
+; SROA.
+define i8* @propagate_nonnull_to_int() {
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[A:.*]] = alloca i64
+; CHECK-NEXT:    store i64 42, i64* %[[A]]
+; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i64, i64* %[[A]]
+; CHECK-NEXT:    %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8*
+; CHECK-NEXT:    ret i8* %[[CAST]]
+entry:
+  %a = alloca [2 x i8*]
+  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+  %a.gep0.cast = bitcast i8** %a.gep0 to i64*
+  %a.gep1.cast = bitcast i8** %a.gep1 to i64*
+  store i64 42, i64* %a.gep1.cast
+  store i64 0, i64* %a.gep0.cast
+  %load = load volatile i8*, i8** %a.gep1, !nonnull !0
+  ret i8* %load
+}
+
+; Make sure we properly handle the !nonnull attribute when we convert
+; a pointer load to an integer load and immediately promote it to an SSA
+; register. This can fail in interesting ways due to the rewrite iteration of
+; SROA, resulting in PR32902.
+define i8* @propagate_nonnull_to_int_and_promote() {
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8*
+; CHECK-NEXT:    ret i8* %[[PROMOTED_VALUE]]
+entry:
+  %a = alloca [2 x i8*], align 8
+  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+  %a.gep0.cast = bitcast i8** %a.gep0 to i64*
+  %a.gep1.cast = bitcast i8** %a.gep1 to i64*
+  store i64 42, i64* %a.gep1.cast
+  store i64 0, i64* %a.gep0.cast
+  %load = load i8*, i8** %a.gep1, align 8, !nonnull !0
+  ret i8* %load
+}
+
+!0 = !{}

Added: llvm/trunk/test/Transforms/SROA/slice-order-independence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/slice-order-independence.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/slice-order-independence.ll (added)
+++ llvm/trunk/test/Transforms/SROA/slice-order-independence.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+
+; Check that the chosen type for a split is independent from the order of
+; slices even in case of types that are skipped because their width is not a
+; byte width multiple
+define void @skipped_inttype_first({ i16*, i32 }*) {
+; CHECK-LABEL: @skipped_inttype_first
+; CHECK: alloca i8*
+  %arg = alloca { i16*, i32 }, align 8
+  %2 = bitcast { i16*, i32 }* %0 to i8*
+  %3 = bitcast { i16*, i32 }* %arg to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %3, i8* align 8 %2, i32 16, i1 false)
+  %b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
+  %pb0 = bitcast i16** %b to i63*
+  %b0 = load i63, i63* %pb0
+  %pb1 = bitcast i16** %b to i8**
+  %b1 = load i8*, i8** %pb1
+  ret void
+}
+
+define void @skipped_inttype_last({ i16*, i32 }*) {
+; CHECK-LABEL: @skipped_inttype_last
+; CHECK: alloca i8*
+  %arg = alloca { i16*, i32 }, align 8
+  %2 = bitcast { i16*, i32 }* %0 to i8*
+  %3 = bitcast { i16*, i32 }* %arg to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %3, i8* align 8 %2, i32 16, i1 false)
+  %b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
+  %pb1 = bitcast i16** %b to i8**
+  %b1 = load i8*, i8** %pb1
+  %pb0 = bitcast i16** %b to i63*
+  %b0 = load i63, i63* %pb0
+  ret void
+}

Added: llvm/trunk/test/Transforms/SROA/slice-width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/slice-width.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/slice-width.ll (added)
+++ llvm/trunk/test/Transforms/SROA/slice-width.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,106 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+
+define void @no_split_on_non_byte_width(i32) {
+; This tests that allocas are not split into slices that are not byte width multiple
+  %arg = alloca i32 , align 8
+  store i32 %0, i32* %arg
+  br label %load_i32
+
+load_i32:
+; CHECK-LABEL: load_i32:
+; CHECK-NOT: bitcast {{.*}} to i1
+; CHECK-NOT: zext i1
+  %r0 = load i32, i32* %arg
+  br label %load_i1
+
+load_i1:
+; CHECK-LABEL: load_i1:
+; CHECK: bitcast {{.*}} to i1
+  %p1 = bitcast i32* %arg to i1*
+  %t1 = load i1, i1* %p1
+  ret void
+}
+
+; PR18726: Check that we use memcpy and memset to fill out padding when we have
+; a slice with a simple single type whose store size is smaller than the slice
+; size.
+
+%union.Foo = type { x86_fp80, i64, i64 }
+
+ at foo_copy_source = external constant %union.Foo
+ at i64_sink = global i64 0
+
+define void @memcpy_fp80_padding() {
+  %x = alloca %union.Foo
+
+  ; Copy from a global.
+  %x_i8 = bitcast %union.Foo* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %x_i8, i8* align 16 bitcast (%union.Foo* @foo_copy_source to i8*), i32 32, i1 false)
+
+  ; Access a slice of the alloca to trigger SROA.
+  %mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
+  %elt = load i64, i64* %mid_p
+  store i64 %elt, i64* @i64_sink
+  ret void
+}
+; CHECK-LABEL: define void @memcpy_fp80_padding
+; CHECK: alloca x86_fp80
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK: load i64, i64* getelementptr inbounds (%union.Foo, %union.Foo* @foo_copy_source, i64 0, i32 1)
+; CHECK: load i64, i64* getelementptr inbounds (%union.Foo, %union.Foo* @foo_copy_source, i64 0, i32 2)
+
+define void @memset_fp80_padding() {
+  %x = alloca %union.Foo
+
+  ; Set to all ones.
+  %x_i8 = bitcast %union.Foo* %x to i8*
+  call void @llvm.memset.p0i8.i32(i8* align 16 %x_i8, i8 -1, i32 32, i1 false)
+
+  ; Access a slice of the alloca to trigger SROA.
+  %mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
+  %elt = load i64, i64* %mid_p
+  store i64 %elt, i64* @i64_sink
+  ret void
+}
+; CHECK-LABEL: define void @memset_fp80_padding
+; CHECK: alloca x86_fp80
+; CHECK: call void @llvm.memset.p0i8.i32(i8* align 16 %{{.*}}, i8 -1, i32 16, i1 false)
+; CHECK: store i64 -1, i64* @i64_sink
+
+%S.vec3float = type { float, float, float }
+%U.vec3float = type { <4 x float> }
+
+declare i32 @memcpy_vec3float_helper(%S.vec3float*)
+
+define i32 @memcpy_vec3float_widening(%S.vec3float* %x) {
+; CHECK-LABEL: @memcpy_vec3float_widening(
+; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte
+; vector store, hence accidentally putting gibberish onto the stack.
+entry:
+  ; Create a temporary variable %tmp1 and copy %x[0] into it
+  %tmp1 = alloca %S.vec3float, align 4
+  %0 = bitcast %S.vec3float* %tmp1 to i8*
+  %1 = bitcast %S.vec3float* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 12, i1 false)
+
+  ; The following block does nothing; but appears to confuse SROA
+  %unused1 = bitcast %S.vec3float* %tmp1 to %U.vec3float*
+  %unused2 = getelementptr inbounds %U.vec3float, %U.vec3float* %unused1, i32 0, i32 0
+  %unused3 = load <4 x float>, <4 x float>* %unused2, align 1
+
+  ; Create a second temporary and copy %tmp1 into it
+  %tmp2 = alloca %S.vec3float, align 4
+  %2 = bitcast %S.vec3float* %tmp2 to i8*
+  %3 = bitcast %S.vec3float* %tmp1 to i8*
+; CHECK: alloca
+; CHECK-NOT: store <4 x float>
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %2, i8* align 4 %3, i32 12, i1 false)
+
+  %result = call i32 @memcpy_vec3float_helper(%S.vec3float* %tmp2)
+  ret i32 %result
+; CHECK: ret i32 %result
+}

Added: llvm/trunk/test/Transforms/SROA/vector-conversion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/vector-conversion.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/vector-conversion.ll (added)
+++ llvm/trunk/test/Transforms/SROA/vector-conversion.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,53 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+define <4 x i64> @vector_ptrtoint({<2 x i32*>, <2 x i32*>} %x) {
+; CHECK-LABEL: @vector_ptrtoint
+  %a = alloca {<2 x i32*>, <2 x i32*>}
+; CHECK-NOT: alloca
+
+  store {<2 x i32*>, <2 x i32*>} %x, {<2 x i32*>, <2 x i32*>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<2 x i32*>, <2 x i32*>}* %a to <4 x i64>*
+  %vec = load <4 x i64>, <4 x i64>* %cast
+; CHECK-NOT: load
+; CHECK: ptrtoint
+
+  ret <4 x i64> %vec
+}
+
+define <4 x i32*> @vector_inttoptr({<2 x i64>, <2 x i64>} %x) {
+; CHECK-LABEL: @vector_inttoptr
+  %a = alloca {<2 x i64>, <2 x i64>}
+; CHECK-NOT: alloca
+
+  store {<2 x i64>, <2 x i64>} %x, {<2 x i64>, <2 x i64>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<2 x i64>, <2 x i64>}* %a to <4 x i32*>*
+  %vec = load <4 x i32*>, <4 x i32*>* %cast
+; CHECK-NOT: load
+; CHECK: inttoptr
+
+  ret <4 x i32*> %vec
+}
+
+define <2 x i64> @vector_ptrtointbitcast({<1 x i32*>, <1 x i32*>} %x) {
+; CHECK-LABEL: @vector_ptrtointbitcast
+  %a = alloca {<1 x i32*>, <1 x i32*>}
+; CHECK-NOT: alloca
+
+  store {<1 x i32*>, <1 x i32*>} %x, {<1 x i32*>, <1 x i32*>}* %a
+; CHECK-NOT: store
+
+  %cast = bitcast {<1 x i32*>, <1 x i32*>}* %a to <2 x i64>*
+  %vec = load <2 x i64>, <2 x i64>* %cast
+; CHECK-NOT: load
+; CHECK: ptrtoint
+; CHECK: bitcast
+; CHECK: ptrtoint
+; CHECK: bitcast
+
+  ret <2 x i64> %vec
+}

Added: llvm/trunk/test/Transforms/SROA/vector-lifetime-intrinsic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/vector-lifetime-intrinsic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/vector-lifetime-intrinsic.ll (added)
+++ llvm/trunk/test/Transforms/SROA/vector-lifetime-intrinsic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; RUN: opt -sroa -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:32-i64:32-v32:32-n32-S64"
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
+
+; CHECK: @wombat
+; CHECK-NOT: alloca
+; CHECK: ret void
+define void @wombat(<4 x float> %arg1) {
+bb:
+  %tmp = alloca <4 x float>, align 16
+  %tmp8 = bitcast <4 x float>* %tmp to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %tmp8)
+  store <4 x float> %arg1, <4 x float>* %tmp, align 16
+  %tmp17 = bitcast <4 x float>* %tmp to <3 x float>*
+  %tmp18 = load <3 x float>, <3 x float>* %tmp17
+  %tmp20 = bitcast <4 x float>* %tmp to i8*
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %tmp20)
+  call void @wombat3(<3 x float> %tmp18)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @wombat3(<3 x float>) #0
+
+attributes #0 = { nounwind }

Added: llvm/trunk/test/Transforms/SROA/vector-promotion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/vector-promotion.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/vector-promotion.ll (added)
+++ llvm/trunk/test/Transforms/SROA/vector-promotion.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,625 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+%S1 = type { i64, [42 x float] }
+
+define i32 @test1(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @test1(
+entry:
+	%a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+  %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
+  store <4 x i32> %x, <4 x i32>* %a.x
+  %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
+  store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+  %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+  %tmp1 = load i32, i32* %a.tmp1
+  %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+  %tmp2 = load i32, i32* %a.tmp2
+  %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+  %tmp3 = load i32, i32* %a.tmp3
+; CHECK-NOT: load
+; CHECK:      extractelement <4 x i32> %x, i32 2
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 3
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 0
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test2(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @test2(
+entry:
+	%a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+  %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
+  store <4 x i32> %x, <4 x i32>* %a.x
+  %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
+  store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+  %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+  %tmp1 = load i32, i32* %a.tmp1
+  %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+  %tmp2 = load i32, i32* %a.tmp2
+  %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+  %a.tmp3.cast = bitcast i32* %a.tmp3 to <2 x i32>*
+  %tmp3.vec = load <2 x i32>, <2 x i32>* %a.tmp3.cast
+  %tmp3 = extractelement <2 x i32> %tmp3.vec, i32 0
+; CHECK-NOT: load
+; CHECK:      %[[extract1:.*]] = extractelement <4 x i32> %x, i32 2
+; CHECK-NEXT: %[[extract2:.*]] = extractelement <4 x i32> %y, i32 3
+; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> %y, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: %[[extract4:.*]] = extractelement <2 x i32> %[[extract3]], i32 0
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+; CHECK-NEXT: %[[sum1:.*]] = add i32 %[[extract1]], %[[extract2]]
+; CHECK-NEXT: %[[sum2:.*]] = add i32 %[[extract4]], %[[sum1]]
+; CHECK-NEXT: ret i32 %[[sum2]]
+}
+
+define i32 @test3(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @test3(
+entry:
+	%a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+  %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
+  store <4 x i32> %x, <4 x i32>* %a.x
+  %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
+  store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+  %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.y.cast, i8 0, i32 16, i1 false)
+; CHECK-NOT: memset
+
+  %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+  %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.tmp1.cast, i8 -1, i32 4, i1 false)
+  %tmp1 = load i32, i32* %a.tmp1
+  %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+  %tmp2 = load i32, i32* %a.tmp2
+  %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+  %tmp3 = load i32, i32* %a.tmp3
+; CHECK-NOT: load
+; CHECK:      %[[insert:.*]] = insertelement <4 x i32> %x, i32 -1, i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 3
+; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 0
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test4(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
+; CHECK-LABEL: @test4(
+entry:
+	%a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+  %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
+  store <4 x i32> %x, <4 x i32>* %a.x
+  %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
+  store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+  %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+  %z.cast = bitcast <4 x i32>* %z to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.y.cast, i8* %z.cast, i32 16, i1 false)
+; CHECK-NOT: memcpy
+
+  %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+  %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+  %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
+  %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.tmp1.cast, i8* %z.tmp1.cast, i32 4, i1 false)
+  %tmp1 = load i32, i32* %a.tmp1
+  %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+  %tmp2 = load i32, i32* %a.tmp2
+  %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+  %tmp3 = load i32, i32* %a.tmp3
+; CHECK-NOT: memcpy
+; CHECK:      %[[load:.*]] = load <4 x i32>, <4 x i32>* %z
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
+; CHECK-NEXT: %[[element_load:.*]] = load i32, i32* %[[gep]]
+; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) nounwind
+
+; Same as test4 with a different sized address  space pointer source.
+define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, <4 x i32> addrspace(1)* %z) {
+; CHECK-LABEL: @test4_as1(
+entry:
+	%a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+  %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
+  store <4 x i32> %x, <4 x i32>* %a.x
+  %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
+  store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+  %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+  %z.cast = bitcast <4 x i32> addrspace(1)* %z to i8 addrspace(1)*
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i1 false)
+; CHECK-NOT: memcpy
+
+  %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+  %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+  %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %z, i16 0, i16 2
+  %z.tmp1.cast = bitcast i32 addrspace(1)* %z.tmp1 to i8 addrspace(1)*
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i1 false)
+  %tmp1 = load i32, i32* %a.tmp1
+  %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+  %tmp2 = load i32, i32* %a.tmp2
+  %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+  %tmp3 = load i32, i32* %a.tmp3
+; CHECK-NOT: memcpy
+; CHECK:      %[[load:.*]] = load <4 x i32>, <4 x i32> addrspace(1)* %z
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %z, i64 0, i64 2
+; CHECK-NEXT: %[[element_load:.*]] = load i32, i32 addrspace(1)* %[[gep]]
+; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
+; CHECK-LABEL: @test5(
+; The same as the above, but with reversed source and destination for the
+; element memcpy, and a self copy.
+entry:
+	%a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+  %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0
+  store <4 x i32> %x, <4 x i32>* %a.x
+  %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1
+  store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+  %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+  %a.x.cast = bitcast <4 x i32>* %a.x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.x.cast, i8* %a.y.cast, i32 16, i1 false)
+; CHECK-NOT: memcpy
+
+  %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+  %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+  %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
+  %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z.tmp1.cast, i8* %a.tmp1.cast, i32 4, i1 false)
+  %tmp1 = load i32, i32* %a.tmp1
+  %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+  %tmp2 = load i32, i32* %a.tmp2
+  %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+  %tmp3 = load i32, i32* %a.tmp3
+; CHECK-NOT: memcpy
+; CHECK:      %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
+; CHECK-NEXT: %[[extract:.*]] = extractelement <4 x i32> %y, i32 2
+; CHECK-NEXT: store i32 %[[extract]], i32* %[[gep]]
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 2
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 3
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 0
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
+
+define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
+; CHECK-LABEL: @test6(
+; The old scalarrepl pass would wrongly drop the store to the second alloca.
+; PR13254
+  %tmp = alloca { <4 x i64>, <4 x i64> }
+  %p0 = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0
+  store <4 x i64> %x, <4 x i64>* %p0
+; CHECK: store <4 x i64> %x,
+  %p1 = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 1
+  store <4 x i64> %y, <4 x i64>* %p1
+; CHECK: store <4 x i64> %y,
+  %addr = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0, i64 %n
+  %res = load i64, i64* %addr, align 4
+  ret i64 %res
+}
+
+define <4 x i32> @test_subvec_store() {
+; CHECK-LABEL: @test_subvec_store(
+entry:
+  %a = alloca <4 x i32>
+; CHECK-NOT: alloca
+
+  %a.gep0 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 0
+  %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
+  store <2 x i32> <i32 0, i32 0>, <2 x i32>* %a.cast0
+; CHECK-NOT: store
+; CHECK:     select <4 x i1> <i1 true, i1 true, i1 false, i1 false> 
+
+  %a.gep1 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 1
+  %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
+  store <2 x i32> <i32 1, i32 1>, <2 x i32>* %a.cast1
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
+
+  %a.gep2 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 2
+  %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
+  store <2 x i32> <i32 2, i32 2>, <2 x i32>* %a.cast2
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
+
+  %a.gep3 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 3
+  store i32 3, i32* %a.gep3
+; CHECK-NEXT: insertelement <4 x i32>
+
+  %ret = load <4 x i32>, <4 x i32>* %a
+
+  ret <4 x i32> %ret
+; CHECK-NEXT: ret <4 x i32> 
+}
+
+define <4 x i32> @test_subvec_load() {
+; CHECK-LABEL: @test_subvec_load(
+entry:
+  %a = alloca <4 x i32>
+; CHECK-NOT: alloca
+  store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a
+; CHECK-NOT: store
+
+  %a.gep0 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 0
+  %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
+  %first = load <2 x i32>, <2 x i32>* %a.cast0
+; CHECK-NOT: load
+; CHECK:      %[[extract1:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+
+  %a.gep1 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 1
+  %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
+  %second = load <2 x i32>, <2 x i32>* %a.cast1
+; CHECK-NEXT: %[[extract2:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 1, i32 2>
+
+  %a.gep2 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 2
+  %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
+  %third = load <2 x i32>, <2 x i32>* %a.cast2
+; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+  %tmp = shufflevector <2 x i32> %first, <2 x i32> %second, <2 x i32> <i32 0, i32 2>
+  %ret = shufflevector <2 x i32> %tmp, <2 x i32> %third, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: %[[tmp:.*]] = shufflevector <2 x i32> %[[extract1]], <2 x i32> %[[extract2]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: %[[ret:.*]] = shufflevector <2 x i32> %[[tmp]], <2 x i32> %[[extract3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+  ret <4 x i32> %ret
+; CHECK-NEXT: ret <4 x i32> %[[ret]]
+}
+
+declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i1) nounwind
+
+define <4 x float> @test_subvec_memset() {
+; CHECK-LABEL: @test_subvec_memset(
+entry:
+  %a = alloca <4 x float>
+; CHECK-NOT: alloca
+
+  %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0
+  %a.cast0 = bitcast float* %a.gep0 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i1 false)
+; CHECK-NOT: store
+; CHECK: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>
+
+  %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1
+  %a.cast1 = bitcast float* %a.gep1 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i1 false)
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
+
+  %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2
+  %a.cast2 = bitcast float* %a.gep2 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i1 false)
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
+
+  %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3
+  %a.cast3 = bitcast float* %a.gep3 to i8*
+  call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i1 false)
+; CHECK-NEXT: insertelement <4 x float> 
+
+  %ret = load <4 x float>, <4 x float>* %a
+
+  ret <4 x float> %ret
+; CHECK-NEXT: ret <4 x float> 
+}
+
+define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) {
+; CHECK-LABEL: @test_subvec_memcpy(
+entry:
+  %a = alloca <4 x float>
+; CHECK-NOT: alloca
+
+  %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0
+  %a.cast0 = bitcast float* %a.gep0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i1 false)
+; CHECK:      %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
+; CHECK-NEXT: %[[x:.*]] = load <2 x float>, <2 x float>* %[[xptr]]
+; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>  
+
+  %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1
+  %a.cast1 = bitcast float* %a.gep1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i1 false)
+; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
+; CHECK-NEXT: %[[y:.*]] = load <2 x float>, <2 x float>* %[[yptr]]
+; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
+
+  %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2
+  %a.cast2 = bitcast float* %a.gep2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i1 false)
+; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
+; CHECK-NEXT: %[[z:.*]] = load <2 x float>, <2 x float>* %[[zptr]]
+; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
+
+  %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3
+  %a.cast3 = bitcast float* %a.gep3 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i1 false)
+; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float*
+; CHECK-NEXT: %[[f:.*]] = load float, float* %[[fptr]]
+; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> 
+
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i1 false)
+; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>*
+; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]]
+
+  %ret = load <4 x float>, <4 x float>* %a
+
+  ret <4 x float> %ret
+; CHECK-NEXT: ret <4 x float> %[[insert_f]]
+}
+
+define i32 @PR14212() {
+; CHECK-LABEL: @PR14212(
+; This caused a crash when "splitting" the load of the i32 in order to promote
+; the store of <3 x i8> properly. Heavily reduced from an OpenCL test case.
+entry:
+  %retval = alloca <3 x i8>, align 4
+; CHECK-NOT: alloca
+
+  store <3 x i8> undef, <3 x i8>* %retval, align 4
+  %cast = bitcast <3 x i8>* %retval to i32*
+  %load = load i32, i32* %cast, align 4
+  ret i32 %load
+; CHECK: ret i32
+}
+
+define <2 x i8> @PR14349.1(i32 %x) {
+; CHECK: @PR14349.1
+; The first testcase for broken SROA rewriting of split integer loads and
+; stores due to smaller vector loads and stores. This particular test ensures
+; that we can rewrite a split store of an integer to a store of a vector.
+entry:
+  %a = alloca i32
+; CHECK-NOT: alloca
+
+  store i32 %x, i32* %a
+; CHECK-NOT: store
+
+  %cast = bitcast i32* %a to <2 x i8>*
+  %vec = load <2 x i8>, <2 x i8>* %cast
+; CHECK-NOT: load
+
+  ret <2 x i8> %vec
+; CHECK: %[[trunc:.*]] = trunc i32 %x to i16
+; CHECK: %[[cast:.*]] = bitcast i16 %[[trunc]] to <2 x i8>
+; CHECK: ret <2 x i8> %[[cast]]
+}
+
+define i32 @PR14349.2(<2 x i8> %x) {
+; CHECK: @PR14349.2
+; The first testcase for broken SROA rewriting of split integer loads and
+; stores due to smaller vector loads and stores. This particular test ensures
+; that we can rewrite a split load of an integer to a load of a vector.
+entry:
+  %a = alloca i32
+; CHECK-NOT: alloca
+
+  %cast = bitcast i32* %a to <2 x i8>*
+  store <2 x i8> %x, <2 x i8>* %cast
+; CHECK-NOT: store
+
+  %int = load i32, i32* %a
+; CHECK-NOT: load
+
+  ret i32 %int
+; CHECK: %[[cast:.*]] = bitcast <2 x i8> %x to i16
+; CHECK: %[[trunc:.*]] = zext i16 %[[cast]] to i32
+; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]]
+; CHECK: ret i32 %[[insert]]
+}
+
+define i32 @test7(<2 x i32> %x, <2 x i32> %y) {
+; Test that we can promote to vectors when the alloca doesn't mention any vector types.
+; CHECK-LABEL: @test7(
+entry:
+	%a = alloca [2 x i64]
+  %a.cast = bitcast [2 x i64]* %a to [2 x <2 x i32>]*
+; CHECK-NOT: alloca
+
+  %a.x = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 0
+  store <2 x i32> %x, <2 x i32>* %a.x
+  %a.y = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1
+  store <2 x i32> %y, <2 x i32>* %a.y
+; CHECK-NOT: store
+
+  %a.tmp1 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 0, i64 1
+  %tmp1 = load i32, i32* %a.tmp1
+  %a.tmp2 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 1
+  %tmp2 = load i32, i32* %a.tmp2
+  %a.tmp3 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 0
+  %tmp3 = load i32, i32* %a.tmp3
+; CHECK-NOT: load
+; CHECK:      extractelement <2 x i32> %x, i32 1
+; CHECK-NEXT: extractelement <2 x i32> %y, i32 1
+; CHECK-NEXT: extractelement <2 x i32> %y, i32 0
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test8(<2 x i32> %x) {
+; Ensure that we can promote an alloca that doesn't mention a vector type based
+; on a single store with a vector type.
+; CHECK-LABEL: @test8(
+entry:
+	%a = alloca i64
+  %a.vec = bitcast i64* %a to <2 x i32>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store <2 x i32> %x, <2 x i32>* %a.vec
+; CHECK-NOT: store
+
+  %tmp1 = load i32, i32* %a.i32
+  %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1
+  %tmp2 = load i32, i32* %a.tmp2
+; CHECK-NOT: load
+; CHECK:      extractelement <2 x i32> %x, i32 0
+; CHECK-NEXT: extractelement <2 x i32> %x, i32 1
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  ret i32 %tmp4
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define <2 x i32> @test9(i32 %x, i32 %y) {
+; Ensure that we can promote an alloca that doesn't mention a vector type based
+; on a single load with a vector type.
+; CHECK-LABEL: @test9(
+entry:
+	%a = alloca i64
+  %a.vec = bitcast i64* %a to <2 x i32>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store i32 %x, i32* %a.i32
+  %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1
+  store i32 %y, i32* %a.tmp2
+; CHECK-NOT: store
+; CHECK:      %[[V1:.*]] = insertelement <2 x i32> undef, i32 %x, i32 0
+; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1
+
+  %result = load <2 x i32>, <2 x i32>* %a.vec
+; CHECK-NOT:  load
+
+  ret <2 x i32> %result
+; CHECK-NEXT: ret <2 x i32> %[[V2]]
+}
+
+define <2 x i32> @test10(<4 x i16> %x, i32 %y) {
+; If there are multiple different vector types used, we should select the one
+; with the widest elements.
+; CHECK-LABEL: @test10(
+entry:
+	%a = alloca i64
+  %a.vec1 = bitcast i64* %a to <2 x i32>*
+  %a.vec2 = bitcast i64* %a to <4 x i16>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store <4 x i16> %x, <4 x i16>* %a.vec2
+  %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1
+  store i32 %y, i32* %a.tmp2
+; CHECK-NOT: store
+; CHECK:      %[[V1:.*]] = bitcast <4 x i16> %x to <2 x i32>
+; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1
+
+  %result = load <2 x i32>, <2 x i32>* %a.vec1
+; CHECK-NOT:  load
+
+  ret <2 x i32> %result
+; CHECK-NEXT: ret <2 x i32> %[[V2]]
+}
+
+define <2 x float> @test11(<4 x i16> %x, i32 %y) {
+; If there are multiple different element types for different vector types,
+; pick the integer types. This isn't really important, but seems like the best
+; heuristic for making a deterministic decision.
+; CHECK-LABEL: @test11(
+entry:
+	%a = alloca i64
+  %a.vec1 = bitcast i64* %a to <2 x float>*
+  %a.vec2 = bitcast i64* %a to <4 x i16>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store <4 x i16> %x, <4 x i16>* %a.vec2
+  %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1
+  store i32 %y, i32* %a.tmp2
+; CHECK-NOT: store
+; CHECK:      %[[V1:.*]] = bitcast i32 %y to <2 x i16>
+; CHECK-NEXT: %[[V2:.*]] = shufflevector <2 x i16> %[[V1]], <2 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT: %[[V3:.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i16> %[[V2]], <4 x i16> %x
+; CHECK-NEXT: %[[V4:.*]] = bitcast <4 x i16> %[[V3]] to <2 x float>
+
+  %result = load <2 x float>, <2 x float>* %a.vec1
+; CHECK-NOT:  load
+
+  ret <2 x float> %result
+; CHECK-NEXT: ret <2 x float> %[[V4]]
+}
+
+define <4 x float> @test12() {
+; CHECK-LABEL: @test12(
+  %a = alloca <3 x i32>, align 16
+; CHECK-NOT: alloca
+
+  %cast1 = bitcast <3 x i32>* %a to <4 x i32>*
+  store <4 x i32> undef, <4 x i32>* %cast1, align 16
+; CHECK-NOT: store
+
+  %cast2 = bitcast <3 x i32>* %a to <3 x float>*
+  %cast3 = bitcast <3 x float>* %cast2 to <4 x float>*
+  %vec = load <4 x float>, <4 x float>* %cast3
+; CHECK-NOT: load
+
+; CHECK:      %[[ret:.*]] = bitcast <4 x i32> undef to <4 x float>
+; CHECK-NEXT: ret <4 x float> %[[ret]]
+  ret <4 x float> %vec
+}

Added: llvm/trunk/test/Transforms/SROA/vectors-of-pointers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SROA/vectors-of-pointers.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SROA/vectors-of-pointers.ll (added)
+++ llvm/trunk/test/Transforms/SROA/vectors-of-pointers.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt < %s -sroa
+
+; Make sure we don't crash on this one.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @foo() {
+entry:
+  %Args.i = alloca <2 x i32*>, align 16
+  br i1 undef, label %bb0.exit158, label %if.then.i.i.i.i.i138
+
+if.then.i.i.i.i.i138:
+  unreachable
+
+bb0.exit158:
+  br i1 undef, label %bb0.exit257, label %if.then.i.i.i.i.i237
+
+if.then.i.i.i.i.i237:
+  unreachable
+
+bb0.exit257:
+  %0 = load <2 x i32*>, <2 x i32*>* %Args.i, align 16
+  unreachable
+}

Added: llvm/trunk/test/Transforms/SafeStack/AArch64/abi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/AArch64/abi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/AArch64/abi.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/AArch64/abi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt -safe-stack -S -mtriple=aarch64-linux-android < %s -o - | FileCheck %s
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; CHECK: %[[TP:.*]] = call i8* @llvm.thread.pointer()
+; CHECK: %[[SPA0:.*]] = getelementptr i8, i8* %[[TP]], i32 72
+; CHECK: %[[SPA:.*]] = bitcast i8* %[[SPA0]] to i8**
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: store i8* %[[USST]], i8** %[[SPA]]
+
+  %a = alloca i8, align 8
+  call void @Capture(i8* %a)
+
+; CHECK: store i8* %[[USP]], i8** %[[SPA]]
+  ret void
+}
+
+declare void @Capture(i8*)

Added: llvm/trunk/test/Transforms/SafeStack/AArch64/abi_ssp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/AArch64/abi_ssp.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/AArch64/abi_ssp.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/AArch64/abi_ssp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt -safe-stack -S -mtriple=aarch64-linux-android < %s -o - | FileCheck --check-prefixes=TLS,ANDROID %s
+; RUN: opt -safe-stack -S -mtriple=aarch64-unknown-fuchsia < %s -o - | FileCheck --check-prefixes=TLS,FUCHSIA %s
+
+define void @foo() nounwind uwtable safestack sspreq {
+entry:
+; The first @llvm.thread.pointer is for the unsafe stack pointer, skip it.
+; TLS: call i8* @llvm.thread.pointer()
+
+; TLS: %[[TP2:.*]] = call i8* @llvm.thread.pointer()
+; ANDROID: %[[B:.*]] = getelementptr i8, i8* %[[TP2]], i32 40
+; FUCHSIA: %[[B:.*]] = getelementptr i8, i8* %[[TP2]], i32 -16
+; TLS: %[[C:.*]] = bitcast i8* %[[B]] to i8**
+; TLS: %[[StackGuard:.*]] = load i8*, i8** %[[C]]
+; TLS: store i8* %[[StackGuard]], i8** %[[StackGuardSlot:.*]]
+  %a = alloca i128, align 16
+  call void @Capture(i128* %a)
+
+; TLS: %[[A:.*]] = load i8*, i8** %[[StackGuardSlot]]
+; TLS: icmp ne i8* %[[StackGuard]], %[[A]]
+  ret void
+}
+
+declare void @Capture(i128*)

Added: llvm/trunk/test/Transforms/SafeStack/AArch64/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/AArch64/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/AArch64/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/SafeStack/AArch64/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/SafeStack/AArch64/unreachable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/AArch64/unreachable.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/AArch64/unreachable.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/AArch64/unreachable.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt -safe-stack -safe-stack-coloring -S -mtriple=aarch64-linux-android < %s -o - | FileCheck %s
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; CHECK: %[[TP:.*]] = call i8* @llvm.thread.pointer()
+; CHECK: %[[SPA0:.*]] = getelementptr i8, i8* %[[TP]], i32 72
+; CHECK: %[[SPA:.*]] = bitcast i8* %[[SPA0]] to i8**
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: store i8* %[[USST]], i8** %[[SPA]]
+
+  %a = alloca i8, align 8
+  br label %ret
+
+ret:
+  ret void
+
+dead:
+  call void @Capture(i8* %a)
+  br label %ret
+}
+
+declare void @Capture(i8*)

Added: llvm/trunk/test/Transforms/SafeStack/ARM/abi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/ARM/abi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/ARM/abi.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/ARM/abi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt -safe-stack -S -mtriple=arm-linux-android < %s -o - | FileCheck %s
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; CHECK: %[[SPA:.*]] = call i8** @__safestack_pointer_address()
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: store i8* %[[USST]], i8** %[[SPA]]
+
+  %a = alloca i8, align 8
+  call void @Capture(i8* %a)
+
+; CHECK: store i8* %[[USP]], i8** %[[SPA]]
+  ret void
+}
+
+declare void @Capture(i8*)

Added: llvm/trunk/test/Transforms/SafeStack/ARM/debug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/ARM/debug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/ARM/debug.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/ARM/debug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,98 @@
+; RUN: opt -safe-stack -safestack-use-pointer-address < %s -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7-pc-linux-android"
+
+; Original C used to generate debug info:
+; char*** addr;
+; char** __safestack_pointer_address() {
+;   return *addr;
+; }
+; void Capture(char*x);
+; void f() { char c[16]; Capture(c); }
+
+; CHECK: !35 = !DILocation(line: 3, column: 11, scope: !17, inlinedAt: !36)
+; CHECK: !36 = distinct !DILocation(line: 6, scope: !27)
+
+ at addr = common local_unnamed_addr global i8*** null, align 4, !dbg !0
+
+; Function Attrs: norecurse nounwind readonly safestack
+define i8** @__safestack_pointer_address() local_unnamed_addr #0 !dbg !17 {
+entry:
+  %0 = load i8***, i8**** @addr, align 4, !dbg !20, !tbaa !21
+  %1 = load i8**, i8*** %0, align 4, !dbg !25, !tbaa !21
+  ret i8** %1, !dbg !26
+}
+
+; Function Attrs: nounwind safestack
+define void @f() local_unnamed_addr #1 !dbg !27 {
+entry:
+  %c = alloca [16 x i8], align 1
+  %0 = getelementptr inbounds [16 x i8], [16 x i8]* %c, i32 0, i32 0, !dbg !35
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) #5, !dbg !35
+  call void @llvm.dbg.declare(metadata [16 x i8]* %c, metadata !31, metadata !DIExpression()), !dbg !36
+  call void @Capture(i8* nonnull %0) #5, !dbg !37
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) #5, !dbg !38
+  ret void, !dbg !38
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #3
+
+declare void @Capture(i8*) local_unnamed_addr #4
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2
+
+attributes #0 = { norecurse nounwind readonly safestack "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+dsp,+neon,+vfp3,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind safestack "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+dsp,+neon,+vfp3,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { nounwind readnone speculatable }
+attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+dsp,+neon,+vfp3,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #5 = { nounwind }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!11, !12, !13, !14, !15}
+!llvm.ident = !{!16}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "addr", scope: !2, file: !6, line: 1, type: !7, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5)
+!3 = !DIFile(filename: "-", directory: "/")
+!4 = !{}
+!5 = !{!0}
+!6 = !DIFile(filename: "<stdin>", directory: "/")
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 32)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 32)
+!10 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_unsigned_char)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{i32 1, !"wchar_size", i32 4}
+!14 = !{i32 1, !"min_enum_size", i32 4}
+!15 = !{i32 7, !"PIC Level", i32 1}
+!16 = !{!"clang"}
+!17 = distinct !DISubprogram(name: "__safestack_pointer_address", scope: !6, file: !6, line: 2, type: !18, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: true, unit: !2, retainedNodes: !4)
+!18 = !DISubroutineType(types: !19)
+!19 = !{!8}
+!20 = !DILocation(line: 3, column: 11, scope: !17)
+!21 = !{!22, !22, i64 0}
+!22 = !{!"any pointer", !23, i64 0}
+!23 = !{!"omnipotent char", !24, i64 0}
+!24 = !{!"Simple C/C++ TBAA"}
+!25 = !DILocation(line: 3, column: 10, scope: !17)
+!26 = !DILocation(line: 3, column: 3, scope: !17)
+!27 = distinct !DISubprogram(name: "f", scope: !6, file: !6, line: 6, type: !28, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: true, unit: !2, retainedNodes: !30)
+!28 = !DISubroutineType(types: !29)
+!29 = !{null}
+!30 = !{!31}
+!31 = !DILocalVariable(name: "c", scope: !27, file: !6, line: 6, type: !32)
+!32 = !DICompositeType(tag: DW_TAG_array_type, baseType: !10, size: 128, elements: !33)
+!33 = !{!34}
+!34 = !DISubrange(count: 16)
+!35 = !DILocation(line: 6, column: 12, scope: !27)
+!36 = !DILocation(line: 6, column: 17, scope: !27)
+!37 = !DILocation(line: 6, column: 24, scope: !27)
+!38 = !DILocation(line: 6, column: 36, scope: !27)

Added: llvm/trunk/test/Transforms/SafeStack/ARM/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/ARM/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/ARM/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/SafeStack/ARM/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/SafeStack/ARM/setjmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/ARM/setjmp.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/ARM/setjmp.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/ARM/setjmp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; Test stack pointer restore after setjmp() with the function-call safestack ABI.
+; RUN: opt -safe-stack -S -mtriple=arm-linux-androideabi < %s -o - | FileCheck %s
+
+ at env = global [64 x i32] zeroinitializer, align 4
+
+define void @f(i32 %b) safestack {
+entry:
+; CHECK: %[[SPA:.*]] = call i8** @__safestack_pointer_address()
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: %[[USDP:.*]] = alloca i8*
+; CHECK: store i8* %[[USP]], i8** %[[USDP]]
+; CHECK: call i32 @setjmp
+
+  %call = call i32 @setjmp(i32* getelementptr inbounds ([64 x i32], [64 x i32]* @env, i32 0, i32 0)) returns_twice
+
+; CHECK: %[[USP2:.*]] = load i8*, i8** %[[USDP]]
+; CHECK: store i8* %[[USP2]], i8** %[[SPA]]
+
+  %tobool = icmp eq i32 %b, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  %0 = alloca [42 x i8], align 1
+  %.sub = getelementptr inbounds [42 x i8], [42 x i8]* %0, i32 0, i32 0
+  call void @_Z7CapturePv(i8* %.sub)
+  br label %if.end
+
+if.end:
+; CHECK: store i8* %[[USP:.*]], i8** %[[SPA:.*]]
+
+  ret void
+}
+
+declare i32 @setjmp(i32*) returns_twice
+
+declare void @_Z7CapturePv(i8*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/abi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/abi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/abi.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/abi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s --check-prefix=TLS
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s --check-prefix=TLS
+; RUN: opt -safe-stack -S -mtriple=i686-linux-android < %s -o - | FileCheck %s --check-prefix=DIRECT-TLS32
+; RUN: opt -safe-stack -S -mtriple=x86_64-linux-android < %s -o - | FileCheck %s --check-prefix=DIRECT-TLS64
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; TLS: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; TLS: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; TLS: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
+
+; DIRECT-TLS32: %[[USP:.*]] = load i8*, i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+; DIRECT-TLS32: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; DIRECT-TLS32: store i8* %[[USST]], i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+
+; DIRECT-TLS64: %[[USP:.*]] = load i8*, i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+; DIRECT-TLS64: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; DIRECT-TLS64: store i8* %[[USST]], i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+
+  %a = alloca i8, align 8
+  call void @Capture(i8* %a)
+
+; TLS: store i8* %[[USP]], i8** @__safestack_unsafe_stack_ptr
+; DIRECT-TLS32: store i8* %[[USP]], i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+; DIRECT-TLS64: store i8* %[[USP]], i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+  ret void
+}
+
+declare void @Capture(i8*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/abi_ssp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/abi_ssp.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/abi_ssp.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/abi_ssp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt -safe-stack -S -mtriple=i686-pc-linux-gnu < %s -o - | FileCheck --check-prefixes=COMMON,TLS32 %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefixes=COMMON,TLS64 %s
+
+; RUN: opt -safe-stack -S -mtriple=i686-linux-android < %s -o - | FileCheck --check-prefixes=COMMON,GLOBAL32 %s
+; RUN: opt -safe-stack -S -mtriple=i686-linux-android24 < %s -o - | FileCheck --check-prefixes=COMMON,TLS32 %s
+
+; RUN: opt -safe-stack -S -mtriple=x86_64-linux-android < %s -o - | FileCheck --check-prefixes=COMMON,TLS64 %s
+
+; RUN: opt -safe-stack -S -mtriple=x86_64-unknown-fuchsia < %s -o - | FileCheck --check-prefixes=COMMON,FUCHSIA64 %s
+
+define void @foo() safestack sspreq {
+entry:
+; TLS32: %[[StackGuard:.*]] = load i8*, i8* addrspace(256)* inttoptr (i32 20 to i8* addrspace(256)*)
+; TLS64: %[[StackGuard:.*]] = load i8*, i8* addrspace(257)* inttoptr (i32 40 to i8* addrspace(257)*)
+; FUCHSIA64: %[[StackGuard:.*]] = load i8*, i8* addrspace(257)* inttoptr (i32 16 to i8* addrspace(257)*)
+; GLOBAL32: %[[StackGuard:.*]] = load i8*, i8** @__stack_chk_guard
+; COMMON:   store i8* %[[StackGuard]], i8** %[[StackGuardSlot:.*]]
+  %a = alloca i8, align 1
+  call void @Capture(i8* %a)
+
+; COMMON: %[[A:.*]] = load i8*, i8** %[[StackGuardSlot]]
+; COMMON: icmp ne i8* %[[StackGuard]], %[[A]]
+  ret void
+}
+
+declare void @Capture(i8*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/addr-taken.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/addr-taken.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/addr-taken.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/addr-taken.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Address-of local taken (j = &a)
+; Requires protector.
+
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %retval = alloca i32, align 4
+  %a = alloca i32, align 4
+  %j = alloca i32*, align 8
+  store i32 0, i32* %retval
+  %0 = load i32, i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  store i32* %a, i32** %j, align 8
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/SafeStack/X86/array-aligned.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/array-aligned.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/array-aligned.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/array-aligned.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; array of [16 x i8]
+
+define void @foo(i8* %a) nounwind uwtable safestack {
+entry:
+  ; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+
+  ; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+
+  ; CHECK: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
+
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+
+  ; CHECK: %[[AADDR:.*]] = alloca i8*, align 8
+  ; CHECK: store i8* {{.*}}, i8** %[[AADDR]], align 8
+  store i8* %a, i8** %a.addr, align 8
+
+  ; CHECK: %[[BUFPTR:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+  ; CHECK: %[[BUFPTR2:.*]] = bitcast i8* %[[BUFPTR]] to [16 x i8]*
+  ; CHECK: %[[GEP:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[BUFPTR2]], i32 0, i32 0
+  %gep = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+
+  ; CHECK: %[[A2:.*]] = load i8*, i8** %[[AADDR]], align 8
+  %a2 = load i8*, i8** %a.addr, align 8
+
+  ; CHECK: call i8* @strcpy(i8* %[[GEP]], i8* %[[A2]])
+  %call = call i8* @strcpy(i8* %gep, i8* %a2)
+
+  ; CHECK: store i8* %[[USP]], i8** @__safestack_unsafe_stack_ptr
+  ret void
+}
+
+declare i8* @strcpy(i8*, i8*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/array.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/array.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/array.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/array.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,89 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=i386-pc-contiki-unknown < %s -o - | FileCheck -check-prefix=SINGLE-THREAD %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+; array [4 x i8]
+; Requires protector.
+
+; CHECK: @__safestack_unsafe_stack_ptr = external thread_local(initialexec) global i8*
+; SINGLE-THREAD: @__safestack_unsafe_stack_ptr = external global i8*
+
+define void @foo(i8* %a) nounwind uwtable safestack {
+entry:
+  ; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+
+  ; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+
+  ; CHECK: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
+
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [4 x i8], align 1
+
+  ; CHECK: %[[AADDR:.*]] = alloca i8*, align 8
+  ; CHECK: store i8* {{.*}}, i8** %[[AADDR]], align 8
+  store i8* %a, i8** %a.addr, align 8
+
+  ; CHECK: %[[BUFPTR:.*]] = getelementptr i8, i8* %[[USP]], i32 -4
+  ; CHECK: %[[BUFPTR2:.*]] = bitcast i8* %[[BUFPTR]] to [4 x i8]*
+  ; CHECK: %[[GEP:.*]] = getelementptr inbounds [4 x i8], [4 x i8]* %[[BUFPTR2]], i32 0, i32 0
+  %gep = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+
+  ; CHECK: %[[A2:.*]] = load i8*, i8** %[[AADDR]], align 8
+  %a2 = load i8*, i8** %a.addr, align 8
+
+  ; CHECK: call i8* @strcpy(i8* %[[GEP]], i8* %[[A2]])
+  %call = call i8* @strcpy(i8* %gep, i8* %a2)
+
+  ; CHECK: store i8* %[[USP]], i8** @__safestack_unsafe_stack_ptr
+  ret void
+}
+
+; Load from an array at a fixed offset, no overflow.
+define i8 @StaticArrayFixedSafe() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: define i8 @StaticArrayFixedSafe(
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i8
+  %buf = alloca i8, i32 4, align 1
+  %gep = getelementptr inbounds i8, i8* %buf, i32 2
+  %x = load i8, i8* %gep, align 1
+  ret i8 %x
+}
+
+; Load from an array at a fixed offset with overflow.
+define i8 @StaticArrayFixedUnsafe() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: define i8 @StaticArrayFixedUnsafe(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i8
+  %buf = alloca i8, i32 4, align 1
+  %gep = getelementptr inbounds i8, i8* %buf, i32 5
+  %x = load i8, i8* %gep, align 1
+  ret i8 %x
+}
+
+; Load from an array at an unknown offset.
+define i8 @StaticArrayVariableUnsafe(i32 %ofs) nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: define i8 @StaticArrayVariableUnsafe(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i8
+  %buf = alloca i8, i32 4, align 1
+  %gep = getelementptr inbounds i8, i8* %buf, i32 %ofs
+  %x = load i8, i8* %gep, align 1
+  ret i8 %x
+}
+
+; Load from an array of an unknown size.
+define i8 @DynamicArrayUnsafe(i32 %sz) nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: define i8 @DynamicArrayUnsafe(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i8
+  %buf = alloca i8, i32 %sz, align 1
+  %gep = getelementptr inbounds i8, i8* %buf, i32 2
+  %x = load i8, i8* %gep, align 1
+  ret i8 %x
+}
+
+declare i8* @strcpy(i8*, i8*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/byval.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/byval.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/byval.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/byval.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,68 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { [100 x i32] }
+
+; Safe access to a byval argument.
+define i32 @ByValSafe(%struct.S* byval nocapture readonly align 8 %zzz) norecurse nounwind readonly safestack uwtable {
+entry:
+  ; CHECK-LABEL: @ByValSafe
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i32
+  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 3
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; Unsafe access to a byval argument.
+; Argument is copied to the unsafe stack.
+define i32 @ByValUnsafe(%struct.S* byval nocapture readonly align 8 %zzz, i64 %idx) norecurse nounwind readonly safestack uwtable {
+entry:
+  ; CHECK-LABEL: @ByValUnsafe
+  ; CHECK: %[[A:.*]] = load {{.*}} @__safestack_unsafe_stack_ptr
+  ; CHECK: store {{.*}} @__safestack_unsafe_stack_ptr
+  ; CHECK: %[[B:.*]] = getelementptr i8, i8* %[[A]], i32 -400
+  ; CHECK: %[[C:.*]] = bitcast %struct.S* %zzz to i8*
+  ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[B]], i8* align 8 %[[C]], i64 400, i1 false)
+  ; CHECK: ret i32
+  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 %idx
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; Unsafe access to a byval argument.
+; Argument is copied to the unsafe stack.
+; Check that dest align of memcpy is set according to datalayout prefered alignment
+define i32 @ByValUnsafe2(%struct.S* byval nocapture readonly %zzz, i64 %idx) norecurse nounwind readonly safestack uwtable {
+entry:
+  ; CHECK-LABEL: @ByValUnsafe
+  ; CHECK: %[[A:.*]] = load {{.*}} @__safestack_unsafe_stack_ptr
+  ; CHECK: store {{.*}} @__safestack_unsafe_stack_ptr
+  ; CHECK: %[[B:.*]] = getelementptr i8, i8* %[[A]], i32 -400
+  ; CHECK: %[[C:.*]] = bitcast %struct.S* %zzz to i8*
+  ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[B]], i8* %[[C]], i64 400, i1 false)
+  ; CHECK: ret i32
+  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 %idx
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; Highly aligned byval argument.
+define i32 @ByValUnsafeAligned(%struct.S* byval nocapture readonly align 64 %zzz, i64 %idx) norecurse nounwind readonly safestack uwtable {
+entry:
+  ; CHECK-LABEL: @ByValUnsafeAligned
+  ; CHECK: %[[A:.*]] = load {{.*}} @__safestack_unsafe_stack_ptr
+  ; CHECK: %[[B:.*]] = ptrtoint i8* %[[A]] to i64
+  ; CHECK: and i64 %[[B]], -64
+  ; CHECK: ret i32
+  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 0
+  %0 = load i32, i32* %arrayidx, align 64
+  %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 %idx
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  ret i32 %add
+}
+

Added: llvm/trunk/test/Transforms/SafeStack/X86/call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/call.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,178 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; no arrays / no nested arrays
+; Requires no protector.
+
+define void @foo(i8* %a) nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: define void @foo(
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a.addr = alloca i8*, align 8
+  store i8* %a, i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @call_memset(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_memset
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 %len, i1 false)
+  ret void
+}
+
+define void @call_constant_memset() safestack {
+entry:
+  ; CHECK-LABEL: define void @call_constant_memset
+  ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 2
+  call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 7, i1 false)
+  ret void
+}
+
+define void @call_constant_overflow_memset() safestack {
+entry:
+  ; CHECK-LABEL: define void @call_constant_overflow_memset
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 7
+  call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 5, i1 false)
+  ret void
+}
+
+define void @call_constant_underflow_memset() safestack {
+entry:
+  ; CHECK-LABEL: define void @call_constant_underflow_memset
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr [10 x i8], [10 x i8]* %q, i32 0, i32 -1
+  call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 3, i1 false)
+  ret void
+}
+
+; Readnone nocapture -> safe
+define void @call_readnone(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_readnone
+  ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @readnone(i8* %arraydecay)
+  ret void
+}
+
+; Arg0 is readnone, arg1 is not. Pass alloca ptr as arg0 -> safe
+define void @call_readnone0_0(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_readnone0_0
+  ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @readnone0(i8* %arraydecay, i8* zeroinitializer)
+  ret void
+}
+
+; Arg0 is readnone, arg1 is not. Pass alloca ptr as arg1 -> unsafe
+define void @call_readnone0_1(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_readnone0_1
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @readnone0(i8 *zeroinitializer, i8* %arraydecay)
+  ret void
+}
+
+; Readonly nocapture -> unsafe
+define void @call_readonly(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_readonly
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @readonly(i8* %arraydecay)
+  ret void
+}
+
+; Readonly nocapture -> unsafe
+define void @call_arg_readonly(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_arg_readonly
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @arg_readonly(i8* %arraydecay)
+  ret void
+}
+
+; Readwrite nocapture -> unsafe
+define void @call_readwrite(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_readwrite
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @readwrite(i8* %arraydecay)
+  ret void
+}
+
+; Captures the argument -> unsafe
+define void @call_capture(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_capture
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @capture(i8* %arraydecay)
+  ret void
+}
+
+; Lifetime intrinsics are always safe.
+define void @call_lifetime(i32* %p) {
+  ; CHECK-LABEL: define void @call_lifetime
+  ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+entry:
+  %q = alloca [100 x i8], align 16
+  %0 = bitcast [100 x i8]* %q to i8*
+  call void @llvm.lifetime.start.p0i8(i64 100, i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 100, i8* %0)
+  ret void
+}
+
+declare void @readonly(i8* nocapture) readonly
+declare void @arg_readonly(i8* readonly nocapture)
+declare void @readwrite(i8* nocapture)
+declare void @capture(i8* readnone) readnone
+
+declare void @readnone(i8* nocapture) readnone
+declare void @readnone0(i8* nocapture readnone, i8* nocapture)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind argmemonly
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind argmemonly
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind argmemonly

Added: llvm/trunk/test/Transforms/SafeStack/X86/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/cast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/cast.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/cast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; PtrToInt/IntToPtr Cast
+
+define void @IntToPtr() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @IntToPtr(
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %1 = inttoptr i64 %0 to i32*
+  ret void
+}
+
+define i8 @BitCastNarrow() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @BitCastNarrow(
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i8
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i8*
+  %1 = load i8, i8* %0, align 1
+  ret i8 %1
+}
+
+define i64 @BitCastWide() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @BitCastWide(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i64
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i64*
+  %1 = load i64, i64* %0, align 1
+  ret i64 %1
+}

Added: llvm/trunk/test/Transforms/SafeStack/X86/coloring-ssp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/coloring-ssp.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/coloring-ssp.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/coloring-ssp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; RUN: opt -safe-stack -safe-stack-coloring=1 -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+; %x and %y share a stack slot between them, but not with the stack guard.
+define void @f() safestack sspreq {
+; CHECK-LABEL: define void @f
+entry:
+; CHECK:  %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -16
+
+; CHECK:  %[[A:.*]] = getelementptr i8, i8* %[[USP]], i32 -8
+; CHECK:  %[[StackGuardSlot:.*]] = bitcast i8* %[[A]] to i8**
+; CHECK:  store i8* %{{.*}}, i8** %[[StackGuardSlot]]
+
+  %x = alloca i64, align 8
+  %y = alloca i64, align 8
+  %x0 = bitcast i64* %x to i8*
+  %y0 = bitcast i64* %y to i8*
+
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0)
+; CHECK:  getelementptr i8, i8* %[[USP]], i32 -16
+  call void @capture64(i64* %x)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0)
+
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0)
+; CHECK:  getelementptr i8, i8* %[[USP]], i32 -16
+  call void @capture64(i64* %y)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0)
+
+  ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @capture64(i64*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/coloring.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/coloring.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/coloring.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/coloring.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt -safe-stack -safe-stack-coloring=1 -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -safe-stack-coloring=1 -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+define void @f() safestack {
+entry:
+; CHECK:  %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK:  %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+
+  %x = alloca i32, align 4
+  %x1 = alloca i32, align 4
+  %x2 = alloca i32, align 4
+  %0 = bitcast i32* %x to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %0)
+
+; CHECK:  %[[A1:.*]] = getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK:  %[[A2:.*]] = bitcast i8* %[[A1]] to i32*
+; CHECK:  call void @capture(i32* nonnull %[[A2]])
+
+  call void @capture(i32* nonnull %x)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %0)
+  %1 = bitcast i32* %x1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %1)
+
+; CHECK:  %[[B1:.*]] = getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK:  %[[B2:.*]] = bitcast i8* %[[B1]] to i32*
+; CHECK:  call void @capture(i32* nonnull %[[B2]])
+
+  call void @capture(i32* nonnull %x1)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %1)
+  %2 = bitcast i32* %x2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %2)
+
+; CHECK:  %[[C1:.*]] = getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK:  %[[C2:.*]] = bitcast i8* %[[C1]] to i32*
+; CHECK:  call void @capture(i32* nonnull %[[C2]])
+
+  call void @capture(i32* nonnull %x2)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %2)
+  ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @capture(i32*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/coloring2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/coloring2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/coloring2.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/coloring2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,521 @@
+; RUN: opt -safe-stack -safe-stack-coloring=1 -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -safe-stack-coloring=1 -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+; x and y share the stack slot.
+define void @f() safestack {
+; CHECK-LABEL: define void @f
+entry:
+; CHECK:  %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -16
+
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %z = alloca i32, align 4
+  %x0 = bitcast i32* %x to i8*
+  %y0 = bitcast i32* %y to i8*
+  %z0 = bitcast i32* %z to i8*
+
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %z0)
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0)
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+  call void @capture32(i32* %x)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0)
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0)
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+  call void @capture32(i32* %y)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0)
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -8
+  call void @capture32(i32* %z)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %z0)
+
+  ret void
+}
+
+define void @no_markers() safestack {
+; CHECK-LABEL: define void @no_markers(
+entry:
+; CHECK:  %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -16
+
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %x0 = bitcast i32* %x to i8*
+
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0)
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+  call void @capture32(i32* %x)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0)
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -8
+  call void @capture32(i32* %y)
+
+  ret void
+}
+
+; x and y can't share memory, but they can split z's storage.
+define void @g() safestack {
+; CHECK-LABEL: define void @g
+entry:
+; CHECK:  %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -16
+
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %z = alloca i64, align 4
+  %x0 = bitcast i32* %x to i8*
+  %y0 = bitcast i32* %y to i8*
+  %z0 = bitcast i64* %z to i8*
+
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0)
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0)
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+  call void @capture32(i32* %x)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0)
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -8
+  call void @capture32(i32* %y)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0)
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %z0)
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -8
+  call void @capture64(i64* %z)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %z0)
+
+  ret void
+}
+
+; Both y and z fit in x's alignment gap.
+define void @h() safestack {
+; CHECK-LABEL: define void @h
+entry:
+; CHECK:  %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -16
+
+  %x = alloca i32, align 16
+  %z = alloca i64, align 4
+  %y = alloca i32, align 4
+  %x0 = bitcast i32* %x to i8*
+  %y0 = bitcast i32* %y to i8*
+  %z0 = bitcast i64* %z to i8*
+
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0)
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0)
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %z0)
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -16
+  call void @capture32(i32* %x)
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -12
+  call void @capture32(i32* %y)
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -8
+  call void @capture64(i64* %z)
+
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %z0)
+
+  ret void
+}
+
+; void f(bool a, bool b) {
+;   long x1, x2; capture64(&x1); capture64(&x2);
+;   if (a) {
+;     long y; capture64(&y);
+;     if (b) {
+;       long y1; capture64(&y1);
+;     } else {
+;       long y2; capture64(&y2);
+;     }
+;   } else {
+;     long z; capture64(&z);
+;     if (b) {
+;       long z1; capture64(&z1);
+;     } else {
+;       long z2; capture64(&z2);
+;     }
+;   }
+; }
+; Everything fits in 4 x 64-bit slots.
+define void @i(i1 zeroext %a, i1 zeroext %b) safestack {
+; CHECK-LABEL: define void @i
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -32
+  %x1 = alloca i64, align 8
+  %x2 = alloca i64, align 8
+  %y = alloca i64, align 8
+  %y1 = alloca i64, align 8
+  %y2 = alloca i64, align 8
+  %z = alloca i64, align 8
+  %z1 = alloca i64, align 8
+  %z2 = alloca i64, align 8
+  %0 = bitcast i64* %x1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0)
+  %1 = bitcast i64* %x2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %1)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -8
+; CHECK:   call void @capture64(
+  call void @capture64(i64* nonnull %x1)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK:   call void @capture64(
+  call void @capture64(i64* nonnull %x2)
+  br i1 %a, label %if.then, label %if.else4
+
+if.then:                                          ; preds = %entry
+  %2 = bitcast i64* %y to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %2)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -24
+; CHECK:   call void @capture64(
+  call void @capture64(i64* nonnull %y)
+  br i1 %b, label %if.then3, label %if.else
+
+if.then3:                                         ; preds = %if.then
+  %3 = bitcast i64* %y1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %3)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -32
+; CHECK:   call void @capture64(
+  call void @capture64(i64* nonnull %y1)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %3)
+  br label %if.end
+
+if.else:                                          ; preds = %if.then
+  %4 = bitcast i64* %y2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %4)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -32
+; CHECK:   call void @capture64(
+  call void @capture64(i64* nonnull %y2)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %4)
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then3
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %2)
+  br label %if.end9
+
+if.else4:                                         ; preds = %entry
+  %5 = bitcast i64* %z to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %5)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -24
+; CHECK:   call void @capture64(
+  call void @capture64(i64* nonnull %z)
+  br i1 %b, label %if.then6, label %if.else7
+
+if.then6:                                         ; preds = %if.else4
+  %6 = bitcast i64* %z1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %6)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -32
+; CHECK:   call void @capture64(
+  call void @capture64(i64* nonnull %z1)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %6)
+  br label %if.end8
+
+if.else7:                                         ; preds = %if.else4
+  %7 = bitcast i64* %z2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %7)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -32
+; CHECK:   call void @capture64(
+  call void @capture64(i64* nonnull %z2)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %7)
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else7, %if.then6
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %5)
+  br label %if.end9
+
+if.end9:                                          ; preds = %if.end8, %if.end
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0)
+  ret void
+}
+
+; lifetime for x ends in 2 different BBs
+define void @no_merge1(i1 %d) safestack {
+; CHECK-LABEL: define void @no_merge1(
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -16
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %x0 = bitcast i32* %x to i8*
+  %y0 = bitcast i32* %y to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK:   call void @capture32(
+  call void @capture32(i32* %x)
+  br i1 %d, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -8
+; CHECK:   call void @capture32(
+  call void @capture32(i32* %y)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0)
+  ret void
+bb3:
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0)
+  ret void
+}
+
+define void @merge1(i1 %d) safestack {
+; CHECK-LABEL: define void @merge1(
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -16
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %x0 = bitcast i32* %x to i8*
+  %y0 = bitcast i32* %y to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK:   call void @capture32(
+  call void @capture32(i32* %x)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0)
+  br i1 %d, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK:   call void @capture32(
+  call void @capture32(i32* %y)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %y0)
+  ret void
+bb3:
+  ret void
+}
+
+; Missing lifetime.end
+define void @merge2_noend(i1 %d) safestack {
+; CHECK-LABEL: define void @merge2_noend(
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -16
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %x0 = bitcast i32* %x to i8*
+  %y0 = bitcast i32* %y to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK:   call void @capture32(
+  call void @capture32(i32* %x)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0)
+  br i1 %d, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK:   call void @capture32(
+  call void @capture32(i32* %y)
+  ret void
+bb3:
+  ret void
+}
+
+; Missing lifetime.end
+define void @merge3_noend(i1 %d) safestack {
+; CHECK-LABEL: define void @merge3_noend(
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -16
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %x0 = bitcast i32* %x to i8*
+  %y0 = bitcast i32* %y to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %x0)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK:   call void @capture32(
+  call void @capture32(i32* %x)
+  br i1 %d, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0)
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK:   call void @capture32(
+  call void @capture32(i32* %y)
+  ret void
+bb3:
+  ret void
+}
+
+; Missing lifetime.start
+define void @nomerge4_nostart(i1 %d) safestack {
+; CHECK-LABEL: define void @nomerge4_nostart(
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -16
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %x0 = bitcast i32* %x to i8*
+  %y0 = bitcast i32* %y to i8*
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK:   call void @capture32(
+  call void @capture32(i32* %x)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %x0)
+  br i1 %d, label %bb2, label %bb3
+bb2:
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %y0)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -8
+; CHECK:   call void @capture32(
+  call void @capture32(i32* %y)
+  ret void
+bb3:
+  ret void
+}
+
+define void @array_merge() safestack {
+; CHECK-LABEL: define void @array_merge(
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -800
+  %A.i1 = alloca [100 x i32], align 4
+  %B.i2 = alloca [100 x i32], align 4
+  %A.i = alloca [100 x i32], align 4
+  %B.i = alloca [100 x i32], align 4
+  %0 = bitcast [100 x i32]* %A.i to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0)
+  %1 = bitcast [100 x i32]* %B.i to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %1)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -400
+; CHECK:   call void @capture100x32(
+  call void @capture100x32([100 x i32]* %A.i)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -800
+; CHECK:   call void @capture100x32(
+  call void @capture100x32([100 x i32]* %B.i)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1)
+  %2 = bitcast [100 x i32]* %A.i1 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %2)
+  %3 = bitcast [100 x i32]* %B.i2 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %3)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -400
+; CHECK:   call void @capture100x32(
+  call void @capture100x32([100 x i32]* %A.i1)
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -800
+; CHECK:   call void @capture100x32(
+  call void @capture100x32([100 x i32]* %B.i2)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %2)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %3)
+  ret void
+}
+
+define void @myCall_pr15707() safestack {
+; CHECK-LABEL: define void @myCall_pr15707(
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -200000
+  %buf1 = alloca i8, i32 100000, align 16
+  %buf2 = alloca i8, i32 100000, align 16
+
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %buf1)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %buf1)
+
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %buf1)
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %buf2)
+  call void @capture8(i8* %buf1)
+  call void @capture8(i8* %buf2)
+  ret void
+}
+
+; Check that we don't assert and crash even when there are allocas
+; outside the declared lifetime regions.
+define void @bad_range() safestack {
+; CHECK-LABEL: define void @bad_range(
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; A.i and B.i unsafe, not merged
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -800
+; A.i1 and B.i2 safe
+; CHECK: = alloca [100 x i32], align 4
+; CHECK: = alloca [100 x i32], align 4
+
+  %A.i1 = alloca [100 x i32], align 4
+  %B.i2 = alloca [100 x i32], align 4
+  %A.i = alloca [100 x i32], align 4
+  %B.i = alloca [100 x i32], align 4
+  %0 = bitcast [100 x i32]* %A.i to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0) nounwind
+  %1 = bitcast [100 x i32]* %B.i to i8*
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %1) nounwind
+  call void @capture100x32([100 x i32]* %A.i)
+  call void @capture100x32([100 x i32]* %B.i)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %0) nounwind
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1) nounwind
+  br label %block2
+
+block2:
+  ; I am used outside the marked lifetime.
+  call void @capture100x32([100 x i32]* %A.i)
+  call void @capture100x32([100 x i32]* %B.i)
+  ret void
+}
+
+%struct.Klass = type { i32, i32 }
+
+define i32 @shady_range(i32 %argc, i8** nocapture %argv) safestack {
+; CHECK-LABEL: define i32 @shady_range(
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -64
+  %a.i = alloca [4 x %struct.Klass], align 16
+  %b.i = alloca [4 x %struct.Klass], align 16
+  %a8 = bitcast [4 x %struct.Klass]* %a.i to i8*
+  %b8 = bitcast [4 x %struct.Klass]* %b.i to i8*
+  ; I am used outside the lifetime zone below:
+  %z2 = getelementptr inbounds [4 x %struct.Klass], [4 x %struct.Klass]* %a.i, i64 0, i64 0, i32 0
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %a8)
+  call void @llvm.lifetime.start.p0i8(i64 -1, i8* %b8)
+  call void @capture8(i8* %a8)
+  call void @capture8(i8* %b8)
+  %z3 = load i32, i32* %z2, align 16
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %a8)
+  call void @llvm.lifetime.end.p0i8(i64 -1, i8* %b8)
+  ret i32 %z3
+}
+
+define void @end_loop() safestack {
+; CHECK-LABEL: define void @end_loop()
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -16
+  %x = alloca i8, align 4
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) nounwind
+  br label %l2
+
+l2:
+  call void @capture8(i8* %x)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %x) nounwind
+  br label %l2
+}
+
+; Check that @x and @y get distinct stack slots => @x lifetime does not break
+; when control re-enters l2.
+define void @start_loop() safestack {
+; CHECK-LABEL: define void @start_loop()
+entry:
+; CHECK:        %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:   getelementptr i8, i8* %[[USP]], i32 -16
+  %x = alloca i8, align 4
+  %y = alloca i8, align 4
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) nounwind
+  br label %l2
+
+l2:
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -8
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %y) nounwind
+  call void @capture8(i8* %y)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %y) nounwind
+
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -4
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %x) nounwind
+  call void @capture8(i8* %x)
+  br label %l2
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @capture8(i8*)
+declare void @capture32(i32*)
+declare void @capture64(i64*)
+declare void @capture100x32([100 x i32]*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/constant-gep-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/constant-gep-call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/constant-gep-call.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/constant-gep-call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%struct.nest = type { %struct.pair, %struct.pair }
+%struct.pair = type { i32, i32 }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Nested structure, no arrays, no address-of expressions.
+; Verify that the resulting gep-of-gep does not incorrectly trigger
+; a safe stack protector.
+; safestack attribute
+; Requires no protector.
+; CHECK-LABEL: @foo(
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  %c = alloca %struct.nest, align 4
+  %b = getelementptr inbounds %struct.nest, %struct.nest* %c, i32 0, i32 1
+  %_a = getelementptr inbounds %struct.pair, %struct.pair* %b, i32 0, i32 0
+  %0 = load i32, i32* %_a, align 4
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %0)
+  ret void
+}
+
+declare i32 @printf(i8*, ...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/constant-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/constant-gep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/constant-gep.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/constant-gep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%class.A = type { [2 x i8] }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; [2 x i8] in a class
+;  safestack attribute
+; Requires no protector.
+; CHECK-LABEL: @foo(
+define signext i8 @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  %a = alloca %class.A, align 1
+  %array = getelementptr inbounds %class.A, %class.A* %a, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8, i8* %arrayidx, align 1
+  ret i8 %0
+}

Added: llvm/trunk/test/Transforms/SafeStack/X86/constant-geps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/constant-geps.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/constant-geps.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/constant-geps.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%struct.deep = type { %union.anon }
+%union.anon = type { %struct.anon }
+%struct.anon = type { %struct.anon.0 }
+%struct.anon.0 = type { %union.anon.1 }
+%union.anon.1 = type { [2 x i8] }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; [2 x i8] nested in several layers of structs and unions
+;  safestack attribute
+; Requires no protector.
+; CHECK-LABEL: @foo(
+define signext i8 @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  %x = alloca %struct.deep, align 1
+  %b = getelementptr inbounds %struct.deep, %struct.deep* %x, i32 0, i32 0
+  %c = bitcast %union.anon* %b to %struct.anon*
+  %d = getelementptr inbounds %struct.anon, %struct.anon* %c, i32 0, i32 0
+  %e = getelementptr inbounds %struct.anon.0, %struct.anon.0* %d, i32 0, i32 0
+  %array = bitcast %union.anon.1* %e to [2 x i8]*
+  %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
+  %0 = load i8, i8* %arrayidx, align 1
+  ret i8 %0
+}

Added: llvm/trunk/test/Transforms/SafeStack/X86/debug-loc-dynamic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/debug-loc-dynamic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/debug-loc-dynamic.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/debug-loc-dynamic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+
+; Test llvm.dbg.value for dynamic allocas moved onto the unsafe stack.
+; In the dynamic alloca case, the dbg.value does not change with the exception
+; of the alloca pointer in the first argument being replaced with the new stack
+; top address.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i32 %n) safestack !dbg !6 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %n, metadata !11, metadata !14), !dbg !15
+  %0 = zext i32 %n to i64, !dbg !16
+
+; CHECK:  store i8* %[[VLA:.*]], i8** @__safestack_unsafe_stack_ptr
+; CHECK:  tail call void @llvm.dbg.value(metadata i8* %[[VLA]], metadata ![[TYPE:.*]], metadata !DIExpression(DW_OP_deref))
+; CHECK:  call void @capture({{.*}} %[[VLA]])
+
+  %vla = alloca i8, i64 %0, align 16, !dbg !16
+  tail call void @llvm.dbg.value(metadata i8* %vla, metadata !12, metadata !17), !dbg !18
+  call void @capture(i8* nonnull %vla), !dbg !19
+  ret void, !dbg !20
+}
+
+declare void @capture(i8*)
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 272832) (llvm/trunk 272831)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "../llvm/1.cc", directory: "/code/build-llvm")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.9.0 (trunk 272832) (llvm/trunk 272831)"}
+!6 = distinct !DISubprogram(name: "f", linkageName: "_Z1fi", scope: !1, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !10)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null, !9}
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !{!11, !12}
+!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 2, type: !9)
+
+; CHECK-DAG: ![[TYPE]] = !DILocalVariable(name: "x",
+!12 = !DILocalVariable(name: "x", scope: !6, file: !1, line: 3, type: !13)
+!13 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!14 = !DIExpression()
+!15 = !DILocation(line: 2, column: 12, scope: !6)
+!16 = !DILocation(line: 3, column: 3, scope: !6)
+
+!17 = !DIExpression(DW_OP_deref)
+!18 = !DILocation(line: 3, column: 8, scope: !6)
+!19 = !DILocation(line: 4, column: 3, scope: !6)
+!20 = !DILocation(line: 5, column: 1, scope: !6)

Added: llvm/trunk/test/Transforms/SafeStack/X86/debug-loc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/debug-loc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/debug-loc.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/debug-loc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,80 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+
+; Test debug location for the local variables moved onto the unsafe stack.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { [100 x i8] }
+
+; Function Attrs: safestack uwtable
+define void @f(%struct.S* byval align 8 %zzz) #0 !dbg !12 {
+; CHECK: define void @f
+
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+
+  %xxx = alloca %struct.S, align 1
+  call void @llvm.dbg.declare(metadata %struct.S* %zzz, metadata !18, metadata !19), !dbg !20
+  call void @llvm.dbg.declare(metadata %struct.S* %xxx, metadata !21, metadata !19), !dbg !22
+
+; dbg.declare for %zzz and %xxx are gone; replaced with dbg.declare based off the unsafe stack pointer
+; CHECK-NOT: call void @llvm.dbg.declare
+; CHECK: call void @llvm.dbg.declare(metadata i8* %[[USP]], metadata ![[VAR_ARG:.*]], metadata !DIExpression(DW_OP_constu, 104, DW_OP_minus))
+; CHECK-NOT: call void @llvm.dbg.declare
+; CHECK: call void @llvm.dbg.declare(metadata i8* %[[USP]], metadata ![[VAR_LOCAL:.*]], metadata !DIExpression(DW_OP_constu, 208, DW_OP_minus))
+; CHECK-NOT: call void @llvm.dbg.declare
+
+  call void @Capture(%struct.S* %zzz), !dbg !23
+  call void @Capture(%struct.S* %xxx), !dbg !24
+
+; dbg.declare appears before the first use
+; CHECK:   call void @Capture
+; CHECK:   call void @Capture
+
+  ret void, !dbg !25
+}
+
+; CHECK-DAG: ![[VAR_ARG]] = !DILocalVariable(name: "zzz"
+; 100 aligned up to 8
+
+; CHECK-DAG: ![[VAR_LOCAL]] = !DILocalVariable(name: "xxx"
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @Capture(%struct.S*) #2
+
+attributes #0 = { safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16}
+!llvm.ident = !{!17}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 254019) (llvm/trunk 254036)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3)
+!1 = !DIFile(filename: "../llvm/2.cc", directory: "/code/build-llvm")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !1, line: 4, size: 800, align: 8, elements: !5, identifier: "_ZTS1S")
+!5 = !{!6}
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !4, file: !1, line: 5, baseType: !7, size: 800, align: 8)
+!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 800, align: 8, elements: !9)
+!8 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!9 = !{!10}
+!10 = !DISubrange(count: 100)
+!12 = distinct !DISubprogram(name: "f", linkageName: "_Z1f1S", scope: !1, file: !1, line: 10, type: !13, isLocal: false, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!13 = !DISubroutineType(types: !14)
+!14 = !{null, !4}
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{!"clang version 3.8.0 (trunk 254019) (llvm/trunk 254036)"}
+!18 = !DILocalVariable(name: "zzz", arg: 1, scope: !12, file: !1, line: 10, type: !4)
+!19 = !DIExpression()
+!20 = !DILocation(line: 10, column: 10, scope: !12)
+!21 = !DILocalVariable(name: "xxx", scope: !12, file: !1, line: 11, type: !4)
+!22 = !DILocation(line: 11, column: 5, scope: !12)
+!23 = !DILocation(line: 12, column: 3, scope: !12)
+!24 = !DILocation(line: 13, column: 3, scope: !12)
+!25 = !DILocation(line: 14, column: 1, scope: !12)

Added: llvm/trunk/test/Transforms/SafeStack/X86/debug-loc2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/debug-loc2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/debug-loc2.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/debug-loc2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,96 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+
+; Test llvm.dbg.value for the local variables moved onto the unsafe stack.
+; SafeStack rewrites them relative to the unsafe stack pointer (base address of
+; the unsafe stack frame).
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline safestack uwtable
+define void @f() #0 !dbg !6 {
+entry:
+; CHECK:   %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+  %x1 = alloca i32, align 4
+  %x2 = alloca i32, align 4
+  %0 = bitcast i32* %x1 to i8*, !dbg !13
+  %1 = bitcast i32* %x2 to i8*, !dbg !14
+
+; Unhandled dbg.value: expression does not start with OP_DW_deref
+; CHECK: call void @llvm.dbg.value(metadata ![[EMPTY:.*]], metadata !{{.*}}, metadata !{{.*}})
+  tail call void @llvm.dbg.value(metadata i32* %x1, metadata !10, metadata !23), !dbg !16
+
+; Unhandled dbg.value: expression does not start with OP_DW_deref
+; CHECK: call void @llvm.dbg.value(metadata ![[EMPTY]], metadata !{{.*}}, metadata !{{.*}})
+  tail call void @llvm.dbg.value(metadata i32* %x1, metadata !10, metadata !24), !dbg !16
+
+; Supported dbg.value: rewritted based on the [[USP]] value.
+; CHECK: call void @llvm.dbg.value(metadata i8* %[[USP]], metadata ![[X1:.*]], metadata !DIExpression(DW_OP_deref, DW_OP_constu, 4, DW_OP_minus))
+  tail call void @llvm.dbg.value(metadata i32* %x1, metadata !10, metadata !15), !dbg !16
+  call void @capture(i32* nonnull %x1), !dbg !17
+
+; An extra non-dbg.value metadata use of %x2. Replaced with an empty metadata.
+; CHECK: call void @llvm.random.metadata.use(metadata ![[EMPTY]])
+  call void @llvm.random.metadata.use(metadata i32* %x2)
+
+; CHECK: call void @llvm.dbg.value(metadata i8* %[[USP]], metadata ![[X2:.*]], metadata !DIExpression(DW_OP_deref, DW_OP_constu, 8, DW_OP_minus))
+  call void @llvm.dbg.value(metadata i32* %x2, metadata !12, metadata !15), !dbg !18
+  call void @capture(i32* nonnull %x2), !dbg !19
+  ret void, !dbg !20
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare void @capture(i32*) #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, metadata, metadata) #3
+
+declare void @llvm.random.metadata.use(metadata)
+
+attributes #0 = { noinline safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 271022) (llvm/trunk 271027)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "../llvm/2.cc", directory: "/code/build-llvm")
+
+; CHECK-DAG: ![[EMPTY]] = !{}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.9.0 (trunk 271022) (llvm/trunk 271027)"}
+!6 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", scope: !1, file: !1, line: 4, type: !7, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !9)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null}
+!9 = !{!10, !12}
+
+; CHECK-DAG: ![[X1]] = !DILocalVariable(name: "x1",
+!10 = !DILocalVariable(name: "x1", scope: !6, file: !1, line: 5, type: !11)
+!11 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+
+; CHECK-DAG: ![[X2]] = !DILocalVariable(name: "x2",
+!12 = !DILocalVariable(name: "x2", scope: !6, file: !1, line: 6, type: !11)
+!13 = !DILocation(line: 5, column: 3, scope: !6)
+!14 = !DILocation(line: 6, column: 3, scope: !6)
+
+!15 = !DIExpression(DW_OP_deref)
+!16 = !DILocation(line: 5, column: 7, scope: !6)
+!17 = !DILocation(line: 8, column: 3, scope: !6)
+!18 = !DILocation(line: 6, column: 7, scope: !6)
+!19 = !DILocation(line: 9, column: 3, scope: !6)
+!20 = !DILocation(line: 10, column: 1, scope: !6)
+!21 = !DILocation(line: 10, column: 1, scope: !22)
+!22 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 1)
+!23 = !DIExpression()
+!24 = !DIExpression(DW_OP_constu, 42, DW_OP_minus)

Added: llvm/trunk/test/Transforms/SafeStack/X86/dynamic-alloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/dynamic-alloca.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/dynamic-alloca.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/dynamic-alloca.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Variable sized alloca
+;  safestack attribute
+; Requires protector.
+define void @foo(i32 %n) nounwind uwtable safestack {
+entry:
+  ; CHECK: %[[SP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+  %n.addr = alloca i32, align 4
+  %a = alloca i32*, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = load i32, i32* %n.addr, align 4
+  %conv = sext i32 %0 to i64
+  %1 = alloca i8, i64 %conv
+  %2 = bitcast i8* %1 to i32*
+  store i32* %2, i32** %a, align 8
+  ; CHECK: store i8* %[[SP:.*]], i8** @__safestack_unsafe_stack_ptr
+  ret void
+}

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-addr-pointer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-addr-pointer.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-addr-pointer.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-addr-pointer.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of a pointer
+;  safestack attribute
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %a = alloca i32*, align 8
+  %b = alloca i32**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  store i32** %a, i32*** %b, align 8
+  %0 = load i32**, i32*** %b, align 8
+  call void @funcall2(i32** %0)
+  ret void
+}
+
+declare void @funcall2(i32**)
+declare i32* @getp()

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-bitcast-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-bitcast-store.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-bitcast-store.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-bitcast-store.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of a local cast to a ptr of a different type
+;   (e.g., int a; ... ; float *b = &a;)
+;  safestack attribute
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %a = alloca i32, align 4
+  %b = alloca float*, align 8
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  store float* %0, float** %b, align 8
+  %1 = load float*, float** %b, align 8
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
+  ret void
+}
+
+declare i32 @printf(i8*, ...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-bitcast-store2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-bitcast-store2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-bitcast-store2.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-bitcast-store2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of a local cast to a ptr of a different type (optimized)
+;   (e.g., int a; ... ; float *b = &a;)
+;  safestack attribute
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4
+  %0 = bitcast i32* %a to float*
+  call void @funfloat(float* %0) nounwind
+  ret void
+}
+
+declare void @funfloat(float*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-call.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Passing addr-of to function call
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %b = alloca i32, align 4
+  call void @funcall(i32* %b) nounwind
+  ret void
+}
+
+declare void @funcall(i32*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-casted-pointer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-casted-pointer.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-casted-pointer.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-casted-pointer.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of a casted pointer
+;  safestack attribute
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %a = alloca i32*, align 8
+  %b = alloca float**, align 8
+  %call = call i32* @getp()
+  store i32* %call, i32** %a, align 8
+  %0 = bitcast i32** %a to float**
+  store float** %0, float*** %b, align 8
+  %1 = load float**, float*** %b, align 8
+  call void @funfloat2(float** %1)
+  ret void
+}
+
+declare void @funfloat2(float**)
+declare i32* @getp()

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-call.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%struct.pair = type { i32, i32 }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of struct element, GEP followed by callinst.
+;  safestack attribute
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %c = alloca %struct.pair, align 4
+  %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i64 0, i32 1
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y) nounwind
+  ret void
+}
+
+declare i32 @printf(i8*, ...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-invoke.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-invoke.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-invoke.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-invoke.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%struct.pair = type { i32, i32 }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of a struct element passed into an invoke instruction.
+;   (GEP followed by an invoke)
+;  safestack attribute
+; Requires protector.
+define i32 @foo() uwtable safestack personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %c = alloca %struct.pair, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %a = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 0
+  store i32 0, i32* %a, align 4
+  %a1 = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 0
+  invoke void @_Z3exceptPi(i32* %a1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 }
+          catch i8* null
+  ret i32 0
+}
+
+declare void @_Z3exceptPi(i32*)
+declare i32 @__gxx_personality_v0(...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-negative.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-negative.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-negative.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-negative.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of a local, optimized into a GEP (e.g., &a - 12)
+;  safestack attribute
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %a = alloca i32, align 4
+  %add.ptr5 = getelementptr inbounds i32, i32* %a, i64 -12
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5) nounwind
+  ret void
+}
+
+declare i32 @printf(i8*, ...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-ptrtoint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-ptrtoint.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-ptrtoint.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-ptrtoint.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%struct.pair = type { i32, i32 }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of struct element, GEP followed by ptrtoint.
+;  safestack attribute
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
+  %0 = ptrtoint i32* %y to i64
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
+  ret void
+}
+
+declare i32 @printf(i8*, ...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-store.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-store.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-gep-store.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%struct.pair = type { i32, i32 }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of struct element. (GEP followed by store).
+;  safestack attribute
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %c = alloca %struct.pair, align 4
+  %b = alloca i32*, align 8
+  %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
+  store i32* %y, i32** %b, align 8
+  %0 = load i32*, i32** %b, align 8
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
+  ret void
+}
+
+declare i32 @printf(i8*, ...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-phi-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-phi-call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-phi-call.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-phi-call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of in phi instruction
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp = fcmp ogt double %call, 3.140000e+00
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call1 = call double @testi_aux() nounwind
+  store double %call1, double* %x, align 8
+  br label %if.end4
+
+if.else:                                          ; preds = %entry
+  %cmp2 = fcmp ogt double %call, 1.000000e+00
+  br i1 %cmp2, label %if.then3, label %if.end4
+
+if.then3:                                         ; preds = %if.else
+  br label %if.end4
+
+if.end4:                                          ; preds = %if.else, %if.then3, %if.then
+  %y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
+  %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0) nounwind
+  ret void
+}
+
+declare double @testi_aux()
+declare i32 @printf(i8*, ...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-select-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-select-call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-select-call.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-select-call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of in select instruction
+; safestack attribute
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %x = alloca double, align 8
+  %call = call double @testi_aux() nounwind
+  store double %call, double* %x, align 8
+  %cmp2 = fcmp ogt double %call, 0.000000e+00
+  %y.1 = select i1 %cmp2, double* %x, double* null
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+  ret void
+}
+
+declare double @testi_aux()
+declare i32 @printf(i8*, ...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/escape-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/escape-vector.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/escape-vector.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/escape-vector.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%struct.vec = type { <4 x i32> }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of a vector nested in a struct
+;  safestack attribute
+; Requires protector.
+define void @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %c = alloca %struct.vec, align 16
+  %y = getelementptr inbounds %struct.vec, %struct.vec* %c, i64 0, i32 0
+  %add.ptr = getelementptr inbounds <4 x i32>, <4 x i32>* %y, i64 -12
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr) nounwind
+  ret void
+}
+
+declare i32 @printf(i8*, ...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/invoke.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/invoke.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/invoke.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/invoke.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,33 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Addr-of a variable passed into an invoke instruction.
+;  safestack attribute
+; Requires protector and stack restore after landing pad.
+define i32 @foo() uwtable safestack personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  ; CHECK: %[[SP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+  ; CHECK: %[[STATICTOP:.*]] = getelementptr i8, i8* %[[SP]], i32 -16
+  %a = alloca i32, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  store i32 0, i32* %a, align 4
+  invoke void @_Z3exceptPi(i32* %a)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 0
+
+lpad:
+  ; CHECK: landingpad
+  ; CHECK-NEXT: catch
+  %0 = landingpad { i8*, i32 }
+          catch i8* null
+  ; CHECK-NEXT: store i8* %[[STATICTOP]], i8** @__safestack_unsafe_stack_ptr
+  ret i32 0
+}
+
+declare void @_Z3exceptPi(i32*)
+declare i32 @__gxx_personality_v0(...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/layout-frag.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/layout-frag.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/layout-frag.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/layout-frag.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; Test that safestack layout reuses a region w/o fragmentation.
+; RUN: opt -safe-stack -safe-stack-coloring=1 -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+define void @f() safestack {
+; CHECK-LABEL: define void @f
+entry:
+; CHECK:  %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -16
+
+  %x0 = alloca i64, align 8
+  %x1 = alloca i8, align 1
+  %x2 = alloca i64, align 8
+
+  %x0a = bitcast i64* %x0 to i8*
+  %x2a = bitcast i64* %x2 to i8*
+
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %x0a)
+  call void @capture64(i64* %x0)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %x0a)
+
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %x1)
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %x2a)
+  call void @capture8(i8* %x1)
+  call void @capture64(i64* %x2)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %x1)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %x2a)
+
+; Test that i64 allocas share space.
+; CHECK: getelementptr i8, i8* %unsafe_stack_ptr, i32 -8
+; CHECK: getelementptr i8, i8* %unsafe_stack_ptr, i32 -9
+; CHECK: getelementptr i8, i8* %unsafe_stack_ptr, i32 -8
+
+  ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @capture8(i8*)
+declare void @capture64(i64*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/layout-region-split.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/layout-region-split.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/layout-region-split.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/layout-region-split.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,84 @@
+; Regression test for safestack layout. Used to fail with asan.
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+define void @f() safestack {
+; CHECK-LABEL: define void @f
+entry:
+; CHECK:  %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK:   getelementptr i8, i8* %[[USP]], i32 -224
+
+  %x0 = alloca i8, align 16
+  %x1 = alloca i8, align 16
+  %x2 = alloca i8, align 16
+  %x3 = alloca i8, align 16
+  %x4 = alloca i8, align 16
+  %x5 = alloca i8, align 16
+  %x6 = alloca i8, align 16
+  %x7 = alloca i8, align 16
+  %x8 = alloca i8, align 16
+  %x9 = alloca i8, align 16
+  %x10 = alloca i8, align 16
+  %x11 = alloca i8, align 16
+  %x12 = alloca i8, align 16
+  %x13 = alloca i8, align 16
+  %y0 = alloca i8, align 2
+  %y1 = alloca i8, align 2
+  %y2 = alloca i8, align 2
+  %y3 = alloca i8, align 2
+  %y4 = alloca i8, align 2
+  %y5 = alloca i8, align 2
+  %y6 = alloca i8, align 2
+  %y7 = alloca i8, align 2
+  %y8 = alloca i8, align 2
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -16
+  call void @capture8(i8* %x0)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -32
+  call void @capture8(i8* %x1)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -48
+  call void @capture8(i8* %x2)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -64
+  call void @capture8(i8* %x3)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -80
+  call void @capture8(i8* %x4)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -96
+  call void @capture8(i8* %x5)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -112
+  call void @capture8(i8* %x6)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -128
+  call void @capture8(i8* %x7)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -144
+  call void @capture8(i8* %x8)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -160
+  call void @capture8(i8* %x9)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -176
+  call void @capture8(i8* %x10)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -192
+  call void @capture8(i8* %x11)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -208
+  call void @capture8(i8* %x12)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -224
+  call void @capture8(i8* %x13)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -2
+  call void @capture8(i8* %y0)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+  call void @capture8(i8* %y1)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -6
+  call void @capture8(i8* %y2)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -8
+  call void @capture8(i8* %y3)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -10
+  call void @capture8(i8* %y4)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -12
+  call void @capture8(i8* %y5)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -14
+  call void @capture8(i8* %y6)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -18
+  call void @capture8(i8* %y7)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -20
+  call void @capture8(i8* %y8)
+
+  ret void
+}
+
+declare void @capture8(i8*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/SafeStack/X86/memintrinsic-oob-read.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/memintrinsic-oob-read.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/memintrinsic-oob-read.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/memintrinsic-oob-read.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,14 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
+
+; CHECK: __safestack_unsafe_stack_ptr
+define void @oob_read(i8* %ptr) safestack {
+  %1 = alloca i8
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %ptr, i8* align 1 %1, i64 4, i1 false)
+  ret void
+}

Added: llvm/trunk/test/Transforms/SafeStack/X86/no-attr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/no-attr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/no-attr.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/no-attr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; no safestack attribute
+; Requires no protector.
+
+; CHECK-NOT: __safestack_unsafe_stack_ptr
+
+; CHECK: @foo
+define void @foo(i8* %a) nounwind uwtable {
+entry:
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  %a.addr = alloca i8*, align 8
+  %buf = alloca [16 x i8], align 16
+  store i8* %a, i8** %a.addr, align 8
+  %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+  %0 = load i8*, i8** %a.addr, align 8
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+  ret void
+}
+
+declare i8* @strcpy(i8*, i8*)
+declare i32 @printf(i8*, ...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/phi-cycle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/phi-cycle.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/phi-cycle.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/phi-cycle.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,50 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%struct.small = type { i8 }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Address-of a structure taken in a function with a loop where
+; the alloca is an incoming value to a PHI node and a use of that PHI
+; node is also an incoming value.
+; Verify that the address-of analysis does not get stuck in infinite
+; recursion when chasing the alloca through the PHI nodes.
+; Requires protector.
+define i32 @foo(i32 %arg) nounwind uwtable safestack {
+bb:
+  ; CHECK: __safestack_unsafe_stack_ptr
+  %tmp = alloca %struct.small*, align 8
+  %tmp1 = call i32 (...) @dummy(%struct.small** %tmp) nounwind
+  %tmp2 = load %struct.small*, %struct.small** %tmp, align 8
+  %tmp3 = ptrtoint %struct.small* %tmp2 to i64
+  %tmp4 = trunc i64 %tmp3 to i32
+  %tmp5 = icmp sgt i32 %tmp4, 0
+  br i1 %tmp5, label %bb6, label %bb21
+
+bb6:                                              ; preds = %bb17, %bb
+  %tmp7 = phi %struct.small* [ %tmp19, %bb17 ], [ %tmp2, %bb ]
+  %tmp8 = phi i64 [ %tmp20, %bb17 ], [ 1, %bb ]
+  %tmp9 = phi i32 [ %tmp14, %bb17 ], [ %tmp1, %bb ]
+  %tmp10 = getelementptr inbounds %struct.small, %struct.small* %tmp7, i64 0, i32 0
+  %tmp11 = load i8, i8* %tmp10, align 1
+  %tmp12 = icmp eq i8 %tmp11, 1
+  %tmp13 = add nsw i32 %tmp9, 8
+  %tmp14 = select i1 %tmp12, i32 %tmp13, i32 %tmp9
+  %tmp15 = trunc i64 %tmp8 to i32
+  %tmp16 = icmp eq i32 %tmp15, %tmp4
+  br i1 %tmp16, label %bb21, label %bb17
+
+bb17:                                             ; preds = %bb6
+  %tmp18 = getelementptr inbounds %struct.small*, %struct.small** %tmp, i64 %tmp8
+  %tmp19 = load %struct.small*, %struct.small** %tmp18, align 8
+  %tmp20 = add i64 %tmp8, 1
+  br label %bb6
+
+bb21:                                             ; preds = %bb6, %bb
+  %tmp22 = phi i32 [ %tmp1, %bb ], [ %tmp14, %bb6 ]
+  %tmp23 = call i32 (...) @dummy(i32 %tmp22) nounwind
+  ret i32 undef
+}
+
+declare i32 @dummy(...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/phi.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,35 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+define void @f(i1 %d1, i1 %d2) safestack {
+entry:
+; CHECK-LABEL: define void @f(
+; CHECK:         %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT:    getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK:         br i1 %d1, label %[[BB0:.*]], label %[[BB1:.*]]
+  %a = alloca i32, align 8
+  %b = alloca i32, align 8
+  br i1 %d1, label %bb0, label %bb1
+
+bb0:
+; CHECK: [[BB0]]:
+; CHECK: %[[Ai8:.*]] = getelementptr i8, i8* %unsafe_stack_ptr, i32
+; CHECK: %[[AUNSAFE:.*]] = bitcast i8* %[[Ai8]] to i32*
+; CHECK: br i1
+  br i1 %d2, label %bb2, label %bb2
+
+bb1:
+; CHECK: [[BB1]]:
+; CHECK: %[[Bi8:.*]] = getelementptr i8, i8* %unsafe_stack_ptr, i32
+; CHECK: %[[BUNSAFE:.*]] = bitcast i8* %[[Bi8]] to i32*
+; CHECK: br label
+  br label %bb2
+
+bb2:
+; CHECK: phi i32* [ %[[AUNSAFE]], %[[BB0]] ], [ %[[AUNSAFE]], %[[BB0]] ], [ %[[BUNSAFE]], %[[BB1]] ]
+  %c = phi i32* [ %a, %bb0 ], [ %a, %bb0 ], [ %b, %bb1 ]
+  call void @capture(i32* %c)
+  ret void
+}
+
+declare void @capture(i32*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/ret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/ret.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/ret.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/ret.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,17 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Returns an alloca address.
+; Requires protector.
+
+define i64 @foo() nounwind readnone safestack {
+entry:
+  ; CHECK-LABEL: define i64 @foo(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i64
+  %x = alloca [100 x i32], align 16
+  %0 = ptrtoint [100 x i32]* %x to i64
+  ret i64 %0
+}

Added: llvm/trunk/test/Transforms/SafeStack/X86/setjmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/setjmp.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/setjmp.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/setjmp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%struct.__jmp_buf_tag = type { [8 x i64], i32, %struct.__sigset_t }
+%struct.__sigset_t = type { [16 x i64] }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+ at buf = internal global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16
+
+; setjmp/longjmp test.
+; Requires protector.
+define i32 @foo() nounwind uwtable safestack {
+entry:
+  ; CHECK: %[[SP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+  ; CHECK: %[[STATICTOP:.*]] = getelementptr i8, i8* %[[SP]], i32 -16
+  %retval = alloca i32, align 4
+  %x = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 42, i32* %x, align 4
+  %call = call i32 @_setjmp(%struct.__jmp_buf_tag* getelementptr inbounds ([1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* @buf, i32 0, i32 0)) returns_twice
+  ; CHECK: setjmp
+  ; CHECK-NEXT: store i8* %[[STATICTOP]], i8** @__safestack_unsafe_stack_ptr
+  %tobool = icmp ne i32 %call, 0
+  br i1 %tobool, label %if.else, label %if.then
+if.then:                                          ; preds = %entry
+  call void @funcall(i32* %x)
+  br label %if.end
+if.else:                                          ; preds = %entry
+  call i32 (...) @dummy()
+  br label %if.end
+if.end:                                           ; preds = %if.else, %if.then
+  ret i32 0
+}
+
+declare i32 @_setjmp(%struct.__jmp_buf_tag*)
+declare void @funcall(i32*)
+declare i32 @dummy(...)

Added: llvm/trunk/test/Transforms/SafeStack/X86/setjmp2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/setjmp2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/setjmp2.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/setjmp2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,43 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%struct.__jmp_buf_tag = type { [8 x i64], i32, %struct.__sigset_t }
+%struct.__sigset_t = type { [16 x i64] }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+ at buf = internal global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16
+
+; setjmp/longjmp test with dynamically sized array.
+; Requires protector.
+; CHECK: @foo(i32 %[[ARG:.*]])
+define i32 @foo(i32 %size) nounwind uwtable safestack {
+entry:
+  ; CHECK: %[[SP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+  ; CHECK-NEXT: %[[DYNPTR:.*]] = alloca i8*
+  ; CHECK-NEXT: store i8* %[[SP]], i8** %[[DYNPTR]]
+
+  ; CHECK-NEXT: %[[ZEXT:.*]] = zext i32 %[[ARG]] to i64
+  ; CHECK-NEXT: %[[MUL:.*]] = mul i64 %[[ZEXT]], 4
+  ; CHECK-NEXT: %[[SP2:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+  ; CHECK-NEXT: %[[PTRTOINT:.*]] = ptrtoint i8* %[[SP2]] to i64
+  ; CHECK-NEXT: %[[SUB:.*]] = sub i64 %[[PTRTOINT]], %[[MUL]]
+  ; CHECK-NEXT: %[[AND:.*]] = and i64 %[[SUB]], -16
+  ; CHECK-NEXT: %[[INTTOPTR:.*]] = inttoptr i64 %[[AND]] to i8*
+  ; CHECK-NEXT: store i8* %[[INTTOPTR]], i8** @__safestack_unsafe_stack_ptr
+  ; CHECK-NEXT: store i8* %[[INTTOPTR]], i8** %unsafe_stack_dynamic_ptr
+  ; CHECK-NEXT: %[[ALLOCA:.*]] = bitcast i8* %[[INTTOPTR]] to i32*
+  %a = alloca i32, i32 %size
+
+  ; CHECK: setjmp
+  ; CHECK-NEXT: %[[LOAD:.*]] = load i8*, i8** %[[DYNPTR]]
+  ; CHECK-NEXT: store i8* %[[LOAD]], i8** @__safestack_unsafe_stack_ptr
+  %call = call i32 @_setjmp(%struct.__jmp_buf_tag* getelementptr inbounds ([1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* @buf, i32 0, i32 0)) returns_twice
+
+  ; CHECK: call void @funcall(i32* %[[ALLOCA]])
+  call void @funcall(i32* %a)
+  ; CHECK-NEXT: store i8* %[[SP:.*]], i8** @__safestack_unsafe_stack_ptr
+  ret i32 0
+}
+
+declare i32 @_setjmp(%struct.__jmp_buf_tag*)
+declare void @funcall(i32*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/sink-to-use.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/sink-to-use.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/sink-to-use.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/sink-to-use.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; Test that unsafe alloca address calculation is done immediately before each use.
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+
+define void @f() safestack {
+entry:
+  %x0 = alloca i32, align 4
+  %x1 = alloca i32, align 4
+
+; CHECK: %[[A:.*]] = getelementptr i8, i8* %{{.*}}, i32 -4
+; CHECK: %[[X0:.*]] = bitcast i8* %[[A]] to i32*
+; CHECK: call void @use(i32* %[[X0]])
+  call void @use(i32* %x0)
+
+; CHECK: %[[B:.*]] = getelementptr i8, i8* %{{.*}}, i32 -8
+; CHECK: %[[X1:.*]] = bitcast i8* %[[B]] to i32*
+; CHECK: call void @use(i32* %[[X1]])
+  call void @use(i32* %x1)
+  ret void
+}
+
+declare void @use(i32*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/ssp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/ssp.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/ssp.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/ssp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt -safe-stack -S -mtriple=x86_64-unknown < %s -o - | FileCheck %s
+
+define void @foo() safestack sspreq {
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
+
+; CHECK: %[[A:.*]] = getelementptr i8, i8* %[[USP]], i32 -8
+; CHECK: %[[StackGuardSlot:.*]] = bitcast i8* %[[A]] to i8**
+; CHECK: %[[StackGuard:.*]] = load i8*, i8** @__stack_chk_guard
+; CHECK: store i8* %[[StackGuard]], i8** %[[StackGuardSlot]]
+  %a = alloca i8, align 1
+
+; CHECK: call void @Capture
+  call void @Capture(i8* %a)
+
+; CHECK: %[[B:.*]] = load i8*, i8** %[[StackGuardSlot]]
+; CHECK: %[[COND:.*]] = icmp ne i8* %[[StackGuard]], %[[B]]
+; CHECK: br i1 %[[COND]], {{.*}} !prof
+
+; CHECK:      call void @__stack_chk_fail()
+; CHECK-NEXT: unreachable
+
+; CHECK:      store i8* %[[USP]], i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: ret void
+  ret void
+}
+
+declare void @Capture(i8*)

Added: llvm/trunk/test/Transforms/SafeStack/X86/store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/store.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/store.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/store.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,63 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+define void @bad_store() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @bad_store(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %1 = inttoptr i64 %0 to i64*
+  store i64 zeroinitializer, i64* %1
+  ret void
+}
+
+define void @good_store() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @good_store(
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i8*
+  store i8 zeroinitializer, i8* %0
+  ret void
+}
+
+define void @overflow_gep_store() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @overflow_gep_store(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i8*
+  %1 = getelementptr i8, i8* %0, i32 4
+  store i8 zeroinitializer, i8* %1
+  ret void
+}
+
+define void @underflow_gep_store() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @underflow_gep_store(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i8*
+  %1 = getelementptr i8, i8* %0, i32 -1
+  store i8 zeroinitializer, i8* %1
+  ret void
+}
+
+define void @good_gep_store() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @good_gep_store(
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i8*
+  %1 = getelementptr i8, i8* %0, i32 3
+  store i8 zeroinitializer, i8* %1
+  ret void
+}

Added: llvm/trunk/test/Transforms/SafeStack/X86/struct.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SafeStack/X86/struct.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SafeStack/X86/struct.ll (added)
+++ llvm/trunk/test/Transforms/SafeStack/X86/struct.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+%struct.foo = type { [16 x i8] }
+
+ at .str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; struct { [16 x i8] }
+
+define void @foo(i8* %a) nounwind uwtable safestack {
+entry:
+  ; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+
+  ; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+
+  ; CHECK: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
+
+  %a.addr = alloca i8*, align 8
+  %buf = alloca %struct.foo, align 1
+
+  ; CHECK: %[[AADDR:.*]] = alloca i8*, align 8
+  ; CHECK: store i8* {{.*}}, i8** %[[AADDR]], align 8
+  store i8* %a, i8** %a.addr, align 8
+
+  ; CHECK: %[[BUFPTR:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+  ; CHECK: %[[BUFPTR2:.*]] = bitcast i8* %[[BUFPTR]] to %struct.foo*
+  ; CHECK: %[[GEP:.*]] = getelementptr inbounds %struct.foo, %struct.foo* %[[BUFPTR2]], i32 0, i32 0, i32 0
+  %gep = getelementptr inbounds %struct.foo, %struct.foo* %buf, i32 0, i32 0, i32 0
+
+  ; CHECK: %[[A:.*]] = load i8*, i8** %[[AADDR]], align 8
+  %a2 = load i8*, i8** %a.addr, align 8
+
+  ; CHECK: call i8* @strcpy(i8* %[[GEP]], i8* %[[A]])
+  %call = call i8* @strcpy(i8* %gep, i8* %a2)
+
+  ; CHECK: store i8* %[[USP]], i8** @__safestack_unsafe_stack_ptr
+  ret void
+}
+
+declare i8* @strcpy(i8*, i8*)

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+empty:100:0
+ 1.-3: 10

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+3empty:100:BAD
+ 0: 0
+ 1: 100

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_line_values.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_line_values.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_line_values.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_line_values.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+empty:100:0
+-1: 10

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_mangle.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_mangle.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_mangle.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+double convert<std::string, float>(float):2909472:181842
+ 0: 181842
+ 1: 181842

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+empty:100:0
+ 0: 0
+ 1: BAD

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_samples.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_samples.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_samples.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/bad_samples.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+empty:100:0
+ 1.3: -10

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/branch.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/branch.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/branch.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/branch.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,10 @@
+main:15680:2500
+ 1: 2500
+ 4: 1000
+ 5: 1000
+ 6: 800
+ 7: 500
+ 9: 10226
+ 10: 2243
+ 16: 0
+ 18: 0

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/calls.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/calls.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/calls.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/calls.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,10 @@
+_Z3sumii:105580:5279
+ 0: 5279
+ 1: 5279
+ 2: 5279
+main:225715:0
+ 2.1: 5553
+ 3: 5391
+ # This indicates that at line 3 of this function, the 'then' branch
+ # of the conditional is taken (discriminator '1').
+ 3.1: 5752 _Z3sumii:5860

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/cold-indirect-call.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/cold-indirect-call.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/cold-indirect-call.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/cold-indirect-call.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,6 @@
+foo:5000:1
+ 1: 2000 quz:1000
+ 1: bar:3000
+   1: 3000
+ 1: baz:0
+   1: 0

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,10 @@
+main:20111403:0
+ 2.1: 404065
+ 3: 443089
+ 3.1: 0
+ 4: 404066
+ 6: 0
+ 7: 0
+ 3.1: _Z12never_calledi:0
+  0: 0
+  1: 0

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/coverage-warning.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/coverage-warning.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/coverage-warning.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/coverage-warning.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,5 @@
+foo:30000:100
+ 2: 28000
+ 3: 1000
+# This profile is stale. Function foo() does not have a line 8 anymore.
+ 8: 1700

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/discriminator.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/discriminator.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/discriminator.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/discriminator.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,8 @@
+foo:1000:0
+ 1: 1
+ 2: 1
+ 2.1: 100
+ 3: 100
+ 3.1: 5
+ 4: 100
+ 5: 1

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/einline.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/einline.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/einline.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/einline.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,7 @@
+_Z3foov:200:100
+ 1: _Z3barv:0
+ 2: no_inline:100
+ 3: _Z3barv:100
+recursive:200:100
+ 1: recursive:100
+ 2: recursive:100

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/entry_counts.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/entry_counts.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/entry_counts.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/entry_counts.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+empty:100:13293
+ 0: 0
+ 1: 100

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/entry_counts_cold.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/entry_counts_cold.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/entry_counts_cold.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/entry_counts_cold.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+top:200:100
+ 1: 100 foo:100
+ 2: 100
+ 3: 2
+ 4: 100
+ 1: foo:100
+    2: 100
+    3: 100 bar:100
+    4: 100
+ 3: bar:2
+    1: 2
+    2: 2
+foo:200:150
+ 2: 150
+ 3: 150  bar:150
+ 4: 150
+bar:450:300
+ 1: 300 baz:300
+ 2: 300
+ 3: 300

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/flattened.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/flattened.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/flattened.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/flattened.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+foo:100:100
+ 1: 100

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/fnptr.binprof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/fnptr.binprof?rev=358552&view=auto
==============================================================================
Binary file - no diff available.

Propchange: llvm/trunk/test/Transforms/SampleProfile/Inputs/fnptr.binprof
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/fnptr.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/fnptr.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/fnptr.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/fnptr.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,13 @@
+_Z3fooi:7711:610
+ 1: 610
+_Z3bari:20301:1437
+ 1: 1437
+main:184019:0
+ 3: 0
+ 4: 534
+ 6: 2080
+ 9: 2064 _Z3bari:1471 _Z3fooi:631
+ 5.1: 1075
+ 5: 1075
+ 7: 534
+ 4.2: 534

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/function_metadata.compact.afdo
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/function_metadata.compact.afdo?rev=358552&view=auto
==============================================================================
Binary file - no diff available.

Propchange: llvm/trunk/test/Transforms/SampleProfile/Inputs/function_metadata.compact.afdo
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/function_metadata.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/function_metadata.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/function_metadata.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/function_metadata.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,17 @@
+test:3200:0
+ 1: 100
+ 2: 100
+ 3: foo:1000
+  1: 800
+  3: bar:200
+   2: 190
+   4: baz:10
+    2: 10
+ 4: foo1:1000
+  1: 1000
+ 4: foo2:1000
+  1: 1000 foo3:1000
+test_liveness:1000:0
+ 1: foo:1000
+  1: foo_available:1000
+   2: 1000

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo?rev=358552&view=auto
==============================================================================
Binary file - no diff available.

Propchange: llvm/trunk/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.afdo
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.afdo?rev=358552&view=auto
==============================================================================
Binary file - no diff available.

Propchange: llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.afdo
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.compact.afdo
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.compact.afdo?rev=358552&view=auto
==============================================================================
Binary file - no diff available.

Propchange: llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.compact.afdo
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/indirect-call.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+test:63067:0
+ 1: 3345 _Z3barv:1398 _Z3foov:2059
+test_inline:3000:0
+ 1: 1000 foo_inline3:1000
+ 1: foo_inline1:3000
+  11: 3000
+ 1: foo_inline2:4000
+  19: 4000
+test_noinline:3000:0
+ 1: foo_noinline:3000
+  20: 3000
+test_direct:3000:0
+ 1: foo_direct:3000
+  21: 3000
+test_inline_strip:3000:0
+ 1: foo_inline_strip:3000
+  1: 3000
+test_inline_strip_conflict:3000:0
+ 1: foo_inline_strip_conflict:3000
+  1: 3000
+test_norecursive_inline:3000:0
+ 1: test_norecursive_inline:3000
+  20: 3000
+test_noinline_bitcast:3000:0
+ 1: foo_direct_i32:3000
+  1: 3000
+return_arg_caller:3000:0
+ 1: foo_inline1:3000
+  11: 3000
+ 2: return_arg:3000
+  1: 3000

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-act.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-act.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-act.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-act.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+_Z3bari:100:0
+ 1: _Z3fooi:100
+  2: 100

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-combine.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-combine.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-combine.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-combine.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+foo:1000:1000
+ 1: bar:1000

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-coverage.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-coverage.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-coverage.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-coverage.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,7 @@
+main:501438:0
+ 2.1: 23478
+ 3: 23478
+ 4: 0
+ 0: 0
+ 3: _Z3fool:172746
+  1: 31878 rand:31878

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-hint.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-hint.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-hint.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/inline-hint.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+_Z6hot_fnRxi:700:0
+_Z7cold_fnRxi:1:0
+other:299:0

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/inline.compactbinary.afdo
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/inline.compactbinary.afdo?rev=358552&view=auto
==============================================================================
Binary file - no diff available.

Propchange: llvm/trunk/test/Transforms/SampleProfile/Inputs/inline.compactbinary.afdo
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/inline.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/inline.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/inline.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/inline.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,7 @@
+main:225715:0
+ 2.1: 5553
+ 3: 5391
+ 3.1: _Z3sumii:5860
+  0: 5279
+  1: 5279
+  2: 5279

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/nodebug.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/nodebug.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/nodebug.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/nodebug.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,2 @@
+foo:100:10
+ 0: bar:10

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/nolocinfo.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/nolocinfo.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/nolocinfo.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/nolocinfo.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+foo:30000:100
+ 2: 28000
+ 3: 1000

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/offset.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/offset.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/offset.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/offset.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,4 @@
+_Z3fooi:300:1
+ 65532: 1000
+ 65533: 10
+ 65535: 990

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/propagate.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/propagate.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/propagate.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/propagate.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+_Z3fooiil:33168:0
+ 0: 0
+ 1: 0
+ 2: 0
+ 4: 0
+ 4.1: 302
+ 4.2: 315
+ 5: 302
+ 6: 200
+ 7: 308
+ 8: 227
+ 9: 227
+ 10: 227
+ 11: 83
+ 11.1: 7553
+ 11.2: 7479
+ 12: 7479
+ 13: 7479
+ 16: 305
+ 18: 0
+ 19: 0
+ 65533: 308

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/remap.map
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/remap.map?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/remap.map (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/remap.map Tue Apr 16 21:52:47 2019
@@ -0,0 +1,8 @@
+# foo:: and foo::detail:: are equivalent
+name 3foo N3foo6detailE
+
+# foo::qux and foo::quux are equivalent
+type N3foo3quxE N3foo4quuxE
+
+# N::X and M::X are equivalent
+name N1N1XE N1M1XE

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/remap.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/remap.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/remap.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/remap.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,10 @@
+_ZN3foo3barERKN1N1XINS_4quuxEEE:15680:2500
+ 1: 2500
+ 4: 1000
+ 5: 1000
+ 6: 800
+ 7: 500
+ 9: 10226
+ 10: 2243
+ 16: 0
+ 18: 0

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/remarks.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/remarks.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/remarks.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/remarks.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,7 @@
+main:623868:0
+ 0: 0
+ 0: _Z3foov:623868
+  3: 18346
+  4: 0
+  6: 19475
+  2: 18305

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/summary.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/summary.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/summary.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/summary.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,9 @@
+bar:100:3
+ 1: 100
+foo:200:1
+ 1: 200
+baz:600:1
+ 1: 0
+ 2: 300
+ 1: bar:300
+  1: 300

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/syntax.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/syntax.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/syntax.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/syntax.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+empty:100:0
+ 0: 0
+ 1: 100

Added: llvm/trunk/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof (added)
+++ llvm/trunk/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof Tue Apr 16 21:52:47 2019
@@ -0,0 +1,11 @@
+main:2257150:0
+ 2.1: 5553
+ 3: 5391
+ 3.1: foo:5860
+  0: 5279
+  1: 5279
+  2: 5279
+ 4.1: goo:60
+  0: 20
+  1: 20
+  2: 20

Added: llvm/trunk/test/Transforms/SampleProfile/branch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/branch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/branch.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/branch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,242 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ code for this test case:
+;
+; #include <stdio.h>
+; #include <stdlib.h>
+
+; int main(int argc, char *argv[]) {
+;   if (argc < 2)
+;     return 1;
+;   double result;
+;   int limit = atoi(argv[1]);
+;   if (limit > 100) {
+;     double s = 23.041968 * atoi(argv[2]);
+;     for (int u = 0; u < limit; u++) {
+;       double x = s;
+;       s = x + 3.049 + (double)u;
+;       s -= s + 3.94 / x * 0.32;
+;     }
+;     result = s;
+;   } else {
+;     result = atoi(argv[2]);
+;   }
+;   printf("result is %lf\n", result);
+;   return 0;
+; }
+
+ at .str = private unnamed_addr constant [15 x i8] c"result is %lf\0A\00", align 1
+
+; Function Attrs: uwtable
+define i32 @main(i32 %argc, i8** %argv) #0 !dbg !6 {
+; CHECK: Printing analysis 'Branch Probability Analysis' for function 'main':
+
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %result = alloca double, align 8
+  %limit = alloca i32, align 4
+  %s = alloca double, align 8
+  %u = alloca i32, align 4
+  %x = alloca double, align 8
+  store i32 0, i32* %retval, align 4
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !16, metadata !17), !dbg !18
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !19, metadata !17), !dbg !20
+  %0 = load i32, i32* %argc.addr, align 4, !dbg !21
+  %cmp = icmp slt i32 %0, 2, !dbg !23
+  br i1 %cmp, label %if.then, label %if.end, !dbg !24
+; CHECK:  edge entry -> if.then probability is 0x4ccf6b16 / 0x80000000 = 60.01%
+; CHECK:  edge entry -> if.end probability is 0x333094ea / 0x80000000 = 39.99%
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* %retval, align 4, !dbg !25
+  br label %return, !dbg !25
+
+if.end:                                           ; preds = %entry
+  call void @llvm.dbg.declare(metadata double* %result, metadata !26, metadata !17), !dbg !27
+  call void @llvm.dbg.declare(metadata i32* %limit, metadata !28, metadata !17), !dbg !29
+  %1 = load i8**, i8*** %argv.addr, align 8, !dbg !30
+  %arrayidx = getelementptr inbounds i8*, i8** %1, i64 1, !dbg !30
+  %2 = load i8*, i8** %arrayidx, align 8, !dbg !30
+  %call = call i32 @atoi(i8* %2) #4, !dbg !31
+  store i32 %call, i32* %limit, align 4, !dbg !29
+  %3 = load i32, i32* %limit, align 4, !dbg !32
+  %cmp1 = icmp sgt i32 %3, 100, !dbg !34
+  br i1 %cmp1, label %if.then.2, label %if.else, !dbg !35
+; CHECK: edge if.end -> if.then.2 probability is 0x6652c748 / 0x80000000 = 79.94%
+; CHECK: edge if.end -> if.else probability is 0x19ad38b8 / 0x80000000 = 20.06%
+
+if.then.2:                                        ; preds = %if.end
+  call void @llvm.dbg.declare(metadata double* %s, metadata !36, metadata !17), !dbg !38
+  %4 = load i8**, i8*** %argv.addr, align 8, !dbg !39
+  %arrayidx3 = getelementptr inbounds i8*, i8** %4, i64 2, !dbg !39
+  %5 = load i8*, i8** %arrayidx3, align 8, !dbg !39
+  %call4 = call i32 @atoi(i8* %5) #4, !dbg !40
+  %conv = sitofp i32 %call4 to double, !dbg !40
+  %mul = fmul double 0x40370ABE6A337A81, %conv, !dbg !41
+  store double %mul, double* %s, align 8, !dbg !38
+  call void @llvm.dbg.declare(metadata i32* %u, metadata !42, metadata !17), !dbg !44
+  store i32 0, i32* %u, align 4, !dbg !44
+  br label %for.cond, !dbg !45
+
+for.cond:                                         ; preds = %for.inc, %if.then.2
+  %6 = load i32, i32* %u, align 4, !dbg !46
+  %7 = load i32, i32* %limit, align 4, !dbg !48
+  %cmp5 = icmp slt i32 %6, %7, !dbg !49
+  br i1 %cmp5, label %for.body, label %for.end, !dbg !50, !prof !80
+; CHECK: edge for.cond -> for.body probability is 0x73333333 / 0x80000000 = 90.00%
+; CHECK: edge for.cond -> for.end probability is 0x0ccccccd / 0x80000000 = 10.00%
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.dbg.declare(metadata double* %x, metadata !51, metadata !17), !dbg !53
+  %8 = load double, double* %s, align 8, !dbg !54
+  store double %8, double* %x, align 8, !dbg !53
+  %9 = load double, double* %x, align 8, !dbg !55
+  %add = fadd double %9, 3.049000e+00, !dbg !56
+  %10 = load i32, i32* %u, align 4, !dbg !57
+  %conv6 = sitofp i32 %10 to double, !dbg !57
+  %add7 = fadd double %add, %conv6, !dbg !58
+  store double %add7, double* %s, align 8, !dbg !59
+  %11 = load double, double* %s, align 8, !dbg !60
+  %12 = load double, double* %x, align 8, !dbg !61
+  %div = fdiv double 3.940000e+00, %12, !dbg !62
+  %mul8 = fmul double %div, 3.200000e-01, !dbg !63
+  %add9 = fadd double %11, %mul8, !dbg !64
+  %13 = load double, double* %s, align 8, !dbg !65
+  %sub = fsub double %13, %add9, !dbg !65
+  store double %sub, double* %s, align 8, !dbg !65
+  br label %for.inc, !dbg !66
+
+for.inc:                                          ; preds = %for.body
+  %14 = load i32, i32* %u, align 4, !dbg !67
+  %inc = add nsw i32 %14, 1, !dbg !67
+  store i32 %inc, i32* %u, align 4, !dbg !67
+  br label %for.cond, !dbg !68
+
+for.end:                                          ; preds = %for.cond
+  %15 = load double, double* %s, align 8, !dbg !69
+  store double %15, double* %result, align 8, !dbg !70
+  br label %if.end.13, !dbg !71
+
+if.else:                                          ; preds = %if.end
+  %16 = load i8**, i8*** %argv.addr, align 8, !dbg !72
+  %arrayidx10 = getelementptr inbounds i8*, i8** %16, i64 2, !dbg !72
+  %17 = load i8*, i8** %arrayidx10, align 8, !dbg !72
+  %call11 = call i32 @atoi(i8* %17) #4, !dbg !74
+  %conv12 = sitofp i32 %call11 to double, !dbg !74
+  store double %conv12, double* %result, align 8, !dbg !75
+  br label %if.end.13
+
+if.end.13:                                        ; preds = %if.else, %for.end
+  %18 = load double, double* %result, align 8, !dbg !76
+  %call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i32 0, i32 0), double %18), !dbg !77
+  store i32 0, i32* %retval, align 4, !dbg !78
+  br label %return, !dbg !78
+
+return:                                           ; preds = %if.end.13, %if.then
+  %19 = load i32, i32* %retval, align 4, !dbg !79
+  ret i32 %19, !dbg !79
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @atoi(i8*) #2
+
+declare i32 @printf(i8*, ...) #3
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind readonly }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !3)
+!1 = !DIFile(filename: "test.cc", directory: "/ssd/llvm_commit")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 4, type: !7, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9, !10}
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64, align: 64)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64, align: 64)
+!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)"}
+!16 = !DILocalVariable(name: "argc", arg: 1, scope: !6, file: !1, line: 4, type: !9)
+!17 = !DIExpression()
+!18 = !DILocation(line: 4, column: 15, scope: !6)
+!19 = !DILocalVariable(name: "argv", arg: 2, scope: !6, file: !1, line: 4, type: !10)
+!20 = !DILocation(line: 4, column: 27, scope: !6)
+!21 = !DILocation(line: 5, column: 8, scope: !22)
+!22 = distinct !DILexicalBlock(scope: !6, file: !1, line: 5, column: 8)
+!23 = !DILocation(line: 5, column: 13, scope: !22)
+!24 = !DILocation(line: 5, column: 8, scope: !6)
+!25 = !DILocation(line: 6, column: 6, scope: !22)
+!26 = !DILocalVariable(name: "result", scope: !6, file: !1, line: 7, type: !4)
+!27 = !DILocation(line: 7, column: 11, scope: !6)
+!28 = !DILocalVariable(name: "limit", scope: !6, file: !1, line: 8, type: !9)
+!29 = !DILocation(line: 8, column: 8, scope: !6)
+!30 = !DILocation(line: 8, column: 21, scope: !6)
+!31 = !DILocation(line: 8, column: 16, scope: !6)
+!32 = !DILocation(line: 9, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(scope: !6, file: !1, line: 9, column: 8)
+!34 = !DILocation(line: 9, column: 14, scope: !33)
+!35 = !DILocation(line: 9, column: 8, scope: !6)
+!36 = !DILocalVariable(name: "s", scope: !37, file: !1, line: 10, type: !4)
+!37 = distinct !DILexicalBlock(scope: !33, file: !1, line: 9, column: 21)
+!38 = !DILocation(line: 10, column: 13, scope: !37)
+!39 = !DILocation(line: 10, column: 34, scope: !37)
+!40 = !DILocation(line: 10, column: 29, scope: !37)
+!41 = !DILocation(line: 10, column: 27, scope: !37)
+!42 = !DILocalVariable(name: "u", scope: !43, file: !1, line: 11, type: !9)
+!43 = distinct !DILexicalBlock(scope: !37, file: !1, line: 11, column: 6)
+!44 = !DILocation(line: 11, column: 15, scope: !43)
+!45 = !DILocation(line: 11, column: 11, scope: !43)
+!46 = !DILocation(line: 11, column: 22, scope: !47)
+!47 = distinct !DILexicalBlock(scope: !43, file: !1, line: 11, column: 6)
+!48 = !DILocation(line: 11, column: 26, scope: !47)
+!49 = !DILocation(line: 11, column: 24, scope: !47)
+!50 = !DILocation(line: 11, column: 6, scope: !43)
+!51 = !DILocalVariable(name: "x", scope: !52, file: !1, line: 12, type: !4)
+!52 = distinct !DILexicalBlock(scope: !47, file: !1, line: 11, column: 38)
+!53 = !DILocation(line: 12, column: 15, scope: !52)
+!54 = !DILocation(line: 12, column: 19, scope: !52)
+!55 = !DILocation(line: 13, column: 12, scope: !52)
+!56 = !DILocation(line: 13, column: 14, scope: !52)
+!57 = !DILocation(line: 13, column: 32, scope: !52)
+!58 = !DILocation(line: 13, column: 22, scope: !52)
+!59 = !DILocation(line: 13, column: 10, scope: !52)
+!60 = !DILocation(line: 14, column: 13, scope: !52)
+!61 = !DILocation(line: 14, column: 24, scope: !52)
+!62 = !DILocation(line: 14, column: 22, scope: !52)
+!63 = !DILocation(line: 14, column: 26, scope: !52)
+!64 = !DILocation(line: 14, column: 15, scope: !52)
+!65 = !DILocation(line: 14, column: 10, scope: !52)
+!66 = !DILocation(line: 15, column: 6, scope: !52)
+!67 = !DILocation(line: 11, column: 34, scope: !47)
+!68 = !DILocation(line: 11, column: 6, scope: !47)
+!69 = !DILocation(line: 16, column: 15, scope: !37)
+!70 = !DILocation(line: 16, column: 13, scope: !37)
+!71 = !DILocation(line: 17, column: 4, scope: !37)
+!72 = !DILocation(line: 18, column: 20, scope: !73)
+!73 = distinct !DILexicalBlock(scope: !33, file: !1, line: 17, column: 11)
+!74 = !DILocation(line: 18, column: 15, scope: !73)
+!75 = !DILocation(line: 18, column: 13, scope: !73)
+!76 = !DILocation(line: 20, column: 30, scope: !6)
+!77 = !DILocation(line: 20, column: 4, scope: !6)
+!78 = !DILocation(line: 21, column: 4, scope: !6)
+!79 = !DILocation(line: 22, column: 2, scope: !6)
+!80 = !{!"branch_weights", i32 90, i32 10}

Added: llvm/trunk/test/Transforms/SampleProfile/calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/calls.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/calls.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/calls.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,116 @@
+; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ test case
+;
+; #include <stdio.h>
+;
+; int sum(int x, int y) {
+;   return x + y;
+; }
+;
+; int main() {
+;   int s, i = 0;
+;   while (i++ < 20000 * 20000)
+;     if (i != 100) s = sum(i, s); else s = 30;
+;   printf("sum is %d\n", s);
+;   return 0;
+; }
+;
+ at .str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4, !dbg !11
+  %1 = load i32, i32* %y.addr, align 4, !dbg !11
+  %add = add nsw i32 %0, %1, !dbg !11
+  ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() !dbg !7 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !12
+  br label %while.cond, !dbg !13
+
+while.cond:                                       ; preds = %if.end, %entry
+  %0 = load i32, i32* %i, align 4, !dbg !14
+  %inc = add nsw i32 %0, 1, !dbg !14
+  store i32 %inc, i32* %i, align 4, !dbg !14
+  %cmp = icmp slt i32 %0, 400000000, !dbg !14
+  br i1 %cmp, label %while.body, label %while.end, !dbg !14
+; CHECK: edge while.cond -> while.body probability is 0x77f2798d / 0x80000000 = 93.71% [HOT edge]
+; CHECK: edge while.cond -> while.end probability is 0x080d8673 / 0x80000000 = 6.29%
+
+while.body:                                       ; preds = %while.cond
+  %1 = load i32, i32* %i, align 4, !dbg !16
+  %cmp1 = icmp ne i32 %1, 100, !dbg !16
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+; Without discriminator information, the profiler used to think that
+; both branches out of while.body had the same weight. In reality,
+; the edge while.body->if.then is taken most of the time.
+;
+; CHECK: edge while.body -> if.else probability is 0x0005b1e0 / 0x80000000 = 0.02%
+; CHECK: edge while.body -> if.then probability is 0x7ffa4e20 / 0x80000000 = 99.98% [HOT edge]
+
+
+if.then:                                          ; preds = %while.body
+  %2 = load i32, i32* %i, align 4, !dbg !18
+  %3 = load i32, i32* %s, align 4, !dbg !18
+  %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18
+  store i32 %call, i32* %s, align 4, !dbg !18
+  br label %if.end, !dbg !18
+
+if.else:                                          ; preds = %while.body
+  store i32 30, i32* %s, align 4, !dbg !20
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !22
+
+while.end:                                        ; preds = %while.cond
+  %4 = load i32, i32* %s, align 4, !dbg !24
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "calls.cc", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = !DILocation(line: 4, scope: !4)
+!12 = !DILocation(line: 8, scope: !7)
+!13 = !DILocation(line: 9, scope: !7)
+!14 = !DILocation(line: 9, scope: !15)
+!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7)
+!16 = !DILocation(line: 10, scope: !17)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !19)
+!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17)
+!20 = !DILocation(line: 10, scope: !21)
+!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17)
+!22 = !DILocation(line: 10, scope: !23)
+!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17)
+!24 = !DILocation(line: 11, scope: !7)
+!25 = !DILocation(line: 12, scope: !7)

Added: llvm/trunk/test/Transforms/SampleProfile/cold-indirect-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/cold-indirect-call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/cold-indirect-call.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/cold-indirect-call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/cold-indirect-call.prof -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/cold-indirect-call.prof -S | FileCheck %s
+
+define i32 @foo(i32 ()* %func) !dbg !3 {
+; CHECK: icmp {{.*}} @bar
+; CHECK-NOT: icmp {{.*}} @baz
+  %call = call i32 %func(), !dbg !4
+  ret i32 %call
+}
+
+define i32 @bar() !dbg !5 {
+  ret i32 41, !dbg !6
+}
+
+define i32 @baz() !dbg !7 {
+  ret i32 42, !dbg !8
+}
+
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1)
+!1 = !DIFile(filename: "foo.cc", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, unit: !0)
+!4 = !DILocation(line: 5, scope: !3)
+!5 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 8, unit: !0)
+!6 = !DILocation(line: 9, scope: !5)
+!7 = distinct !DISubprogram(name: "baz", scope: !1, file: !1, line: 12, unit: !0)
+!8 = !DILocation(line: 13, scope: !7)

Added: llvm/trunk/test/Transforms/SampleProfile/compact-binary-profile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/compact-binary-profile.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/compact-binary-profile.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/compact-binary-profile.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,121 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.compactbinary.afdo -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.compactbinary.afdo -S | FileCheck %s
+
+; Original C++ test case
+;
+; #include <stdio.h>
+;
+; int sum(int x, int y) {
+;   return x + y;
+; }
+;
+; int main() {
+;   int s, i = 0;
+;   while (i++ < 20000 * 20000)
+;     if (i != 100) s = sum(i, s); else s = 30;
+;   printf("sum is %d\n", s);
+;   return 0;
+; }
+;
+ at .str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Check sample-profile phase using compactbinary format profile will annotate
+; the IR with exactly the same result as using text format.
+; CHECK: br i1 %cmp, label %while.body, label %while.end{{.*}} !prof ![[IDX1:[0-9]*]]
+; CHECK: br i1 %cmp1, label %if.then, label %if.else{{.*}} !prof ![[IDX2:[0-9]*]]
+; CHECK: call i32 (i8*, ...) @printf{{.*}} !prof ![[IDX3:[0-9]*]]
+; CHECK: = !{!"TotalCount", i64 26781}
+; CHECK: = !{!"MaxCount", i64 5553}
+; CHECK: ![[IDX1]] = !{!"branch_weights", i32 5392, i32 163}
+; CHECK: ![[IDX2]] = !{!"branch_weights", i32 5280, i32 113}
+; CHECK: ![[IDX3]] = !{!"branch_weights", i32 1}
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4, !dbg !11
+  %1 = load i32, i32* %y.addr, align 4, !dbg !11
+  %add = add nsw i32 %0, %1, !dbg !11
+  ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() !dbg !7 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !12
+  br label %while.cond, !dbg !13
+
+while.cond:                                       ; preds = %if.end, %entry
+  %0 = load i32, i32* %i, align 4, !dbg !14
+  %inc = add nsw i32 %0, 1, !dbg !14
+  store i32 %inc, i32* %i, align 4, !dbg !14
+  %cmp = icmp slt i32 %0, 400000000, !dbg !14
+  br i1 %cmp, label %while.body, label %while.end, !dbg !14
+
+while.body:                                       ; preds = %while.cond
+  %1 = load i32, i32* %i, align 4, !dbg !16
+  %cmp1 = icmp ne i32 %1, 100, !dbg !16
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+
+
+if.then:                                          ; preds = %while.body
+  %2 = load i32, i32* %i, align 4, !dbg !18
+  %3 = load i32, i32* %s, align 4, !dbg !18
+  %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18
+  store i32 %call, i32* %s, align 4, !dbg !18
+  br label %if.end, !dbg !18
+
+if.else:                                          ; preds = %while.body
+  store i32 30, i32* %s, align 4, !dbg !20
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !22
+
+while.end:                                        ; preds = %while.cond
+  %4 = load i32, i32* %s, align 4, !dbg !24
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "calls.cc", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = !DILocation(line: 4, scope: !4)
+!12 = !DILocation(line: 8, scope: !7)
+!13 = !DILocation(line: 9, scope: !7)
+!14 = !DILocation(line: 9, scope: !15)
+!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7)
+!16 = !DILocation(line: 10, scope: !17)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !19)
+!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17)
+!20 = !DILocation(line: 10, scope: !21)
+!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17)
+!22 = !DILocation(line: 10, scope: !23)
+!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17)
+!24 = !DILocation(line: 11, scope: !7)
+!25 = !DILocation(line: 12, scope: !7)

Added: llvm/trunk/test/Transforms/SampleProfile/cov-zero-samples.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/cov-zero-samples.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/cov-zero-samples.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/cov-zero-samples.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,147 @@
+; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/cov-zero-samples.prof -sample-profile-check-record-coverage=100 -pass-remarks=sample-profile -pass-remarks-analysis=sample-profile -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/cov-zero-samples.prof -sample-profile-check-record-coverage=100 -pass-remarks=sample-profile -pass-remarks-analysis=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; CHECK: remark: cov-zero-samples.cc:9:29: Applied 404065 samples from profile (offset: 2.1)
+; CHECK: remark: cov-zero-samples.cc:10:9: Applied 443089 samples from profile (offset: 3)
+; CHECK: remark: cov-zero-samples.cc:10:36: Applied 0 samples from profile (offset: 3.1)
+; CHECK: remark: cov-zero-samples.cc:11:12: Applied 404066 samples from profile (offset: 4)
+; CHECK: remark: cov-zero-samples.cc:13:25: Applied 0 samples from profile (offset: 6)
+; CHECK: remark: cov-zero-samples.cc:14:3: Applied 0 samples from profile (offset: 7)
+; CHECK: remark: cov-zero-samples.cc:10:9: most popular destination for conditional branches at cov-zero-samples.cc:9:3
+; CHECK: remark: cov-zero-samples.cc:11:12: most popular destination for conditional branches at cov-zero-samples.cc:10:9
+;
+; Coverage for this profile should be 100%
+; CHECK-NOT: warning: cov-zero-samples.cc:1:
+
+source_filename = "test/Transforms/SampleProfile/cov-zero-samples.ll"
+
+ at N = global i64 8000000000, align 8, !dbg !0
+ at .str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+define i32 @_Z12never_calledi(i32 %i) !dbg !11 {
+entry:
+  ret i32 0, !dbg !15
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+
+define i32 @main() !dbg !17 {
+entry:
+  %retval = alloca i32, align 4
+  %sum = alloca i32, align 4
+  %i = alloca i64, align 8
+  store i32 0, i32* %retval, align 4
+  call void @llvm.dbg.declare(metadata i32* %sum, metadata !20, metadata !21), !dbg !22
+  store i32 0, i32* %sum, align 4, !dbg !22
+  call void @llvm.dbg.declare(metadata i64* %i, metadata !23, metadata !21), !dbg !25
+  store i64 0, i64* %i, align 8, !dbg !25
+  br label %for.cond, !dbg !26
+
+for.cond:                                         ; preds = %for.inc, %entry
+
+  %0 = load i64, i64* %i, align 8, !dbg !27
+  %1 = load volatile i64, i64* @N, align 8, !dbg !30
+  %cmp = icmp slt i64 %0, %1, !dbg !31
+  br i1 %cmp, label %for.body, label %for.end, !dbg !32
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i64, i64* %i, align 8, !dbg !33
+  %3 = load volatile i64, i64* @N, align 8, !dbg !36
+  %cmp1 = icmp sgt i64 %2, %3, !dbg !37
+  br i1 %cmp1, label %if.then, label %if.end, !dbg !38
+
+if.then:                                          ; preds = %for.body
+  %4 = load i64, i64* %i, align 8, !dbg !39
+  %conv = trunc i64 %4 to i32, !dbg !39
+  %call = call i32 @_Z12never_calledi(i32 %conv), !dbg !41
+  %5 = load i32, i32* %sum, align 4, !dbg !42
+  %add = add nsw i32 %5, %call, !dbg !42
+  store i32 %add, i32* %sum, align 4, !dbg !42
+  br label %if.end, !dbg !43
+
+if.end:                                           ; preds = %if.then, %for.body
+  %6 = load i64, i64* %i, align 8, !dbg !44
+  %div = sdiv i64 %6, 239, !dbg !45
+  %7 = load i32, i32* %sum, align 4, !dbg !46
+  %conv2 = sext i32 %7 to i64, !dbg !46
+  %mul = mul nsw i64 %conv2, %div, !dbg !46
+  %conv3 = trunc i64 %mul to i32, !dbg !46
+  store i32 %conv3, i32* %sum, align 4, !dbg !46
+  br label %for.inc, !dbg !47
+
+for.inc:                                          ; preds = %if.end
+  %8 = load i64, i64* %i, align 8, !dbg !48
+  %inc = add nsw i64 %8, 1, !dbg !48
+  store i64 %inc, i64* %i, align 8, !dbg !48
+  br label %for.cond, !dbg !50
+
+for.end:                                          ; preds = %for.cond
+  %9 = load i32, i32* %sum, align 4, !dbg !51
+  %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %9), !dbg !52
+  ret i32 0, !dbg !53
+}
+
+declare i32 @printf(i8*, ...)
+
+attributes #0 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = !DIGlobalVariable(name: "N", scope: !2, file: !3, line: 3, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !4, globals: !5)
+!3 = !DIFile(filename: "cov-zero-samples.cc", directory: ".")
+!4 = !{}
+!5 = !{!0}
+!6 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7)
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)"}
+!11 = distinct !DISubprogram(name: "never_called", linkageName: "_Z12never_calledi", scope: !3, file: !3, line: 5, type: !12, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !2, retainedNodes: !4)
+!12 = !DISubroutineType(types: !13)
+!13 = !{!14, !14}
+!14 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!15 = !DILocation(line: 5, column: 27, scope: !16)
+!16 = !DILexicalBlockFile(scope: !11, file: !3, discriminator: 6)
+!17 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 7, type: !18, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, unit: !2, retainedNodes: !4)
+!18 = !DISubroutineType(types: !19)
+!19 = !{!14}
+!20 = !DILocalVariable(name: "sum", scope: !17, file: !3, line: 8, type: !14)
+!21 = !DIExpression()
+!22 = !DILocation(line: 8, column: 7, scope: !17)
+!23 = !DILocalVariable(name: "i", scope: !24, file: !3, line: 9, type: !7)
+!24 = distinct !DILexicalBlock(scope: !17, file: !3, line: 9, column: 3)
+!25 = !DILocation(line: 9, column: 18, scope: !24)
+!26 = !DILocation(line: 9, column: 8, scope: !24)
+!27 = !DILocation(line: 9, column: 25, scope: !28)
+!28 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
+!29 = distinct !DILexicalBlock(scope: !24, file: !3, line: 9, column: 3)
+!30 = !DILocation(line: 9, column: 29, scope: !28)
+!31 = !DILocation(line: 9, column: 27, scope: !28)
+!32 = !DILocation(line: 9, column: 3, scope: !28)
+!33 = !DILocation(line: 10, column: 9, scope: !34)
+!34 = distinct !DILexicalBlock(scope: !35, file: !3, line: 10, column: 9)
+!35 = distinct !DILexicalBlock(scope: !29, file: !3, line: 9, column: 37)
+!36 = !DILocation(line: 10, column: 13, scope: !34)
+!37 = !DILocation(line: 10, column: 11, scope: !34)
+!38 = !DILocation(line: 10, column: 9, scope: !35)
+!39 = !DILocation(line: 10, column: 36, scope: !40)
+!40 = !DILexicalBlockFile(scope: !34, file: !3, discriminator: 2)
+!41 = !DILocation(line: 10, column: 23, scope: !40)
+!42 = !DILocation(line: 10, column: 20, scope: !40)
+!43 = !DILocation(line: 10, column: 16, scope: !40)
+!44 = !DILocation(line: 11, column: 12, scope: !35)
+!45 = !DILocation(line: 11, column: 14, scope: !35)
+!46 = !DILocation(line: 11, column: 9, scope: !35)
+!47 = !DILocation(line: 12, column: 3, scope: !35)
+!48 = !DILocation(line: 9, column: 33, scope: !49)
+!49 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 4)
+!50 = !DILocation(line: 9, column: 3, scope: !49)
+!51 = !DILocation(line: 13, column: 25, scope: !17)
+!52 = !DILocation(line: 13, column: 3, scope: !17)
+!53 = !DILocation(line: 14, column: 3, scope: !17)
+

Added: llvm/trunk/test/Transforms/SampleProfile/coverage-warning.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/coverage-warning.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/coverage-warning.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/coverage-warning.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,46 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/coverage-warning.prof -sample-profile-check-record-coverage=90 -sample-profile-check-sample-coverage=100 -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/coverage-warning.prof -sample-profile-check-record-coverage=90 -sample-profile-check-sample-coverage=100 -o /dev/null 2>&1 | FileCheck %s
+define i32 @foo(i32 %i) !dbg !4 {
+; The profile has samples for line locations that are no longer present.
+; Coverage does not reach 90%, so we should get this warning:
+;
+; CHECK: warning: coverage-warning.c:1: 2 of 3 available profile records (66%) were applied
+; CHECK: warning: coverage-warning.c:1: 29000 of 30700 available profile samples (94%) were applied
+entry:
+  %retval = alloca i32, align 4
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4, !dbg !9
+  %cmp = icmp sgt i32 %0, 1000, !dbg !10
+  br i1 %cmp, label %if.then, label %if.end, !dbg !9
+
+if.then:                                          ; preds = %entry
+  store i32 30, i32* %retval, align 4, !dbg !11
+  br label %return, !dbg !11
+
+if.end:                                           ; preds = %entry
+  store i32 3, i32* %retval, align 4, !dbg !12
+  br label %return, !dbg !12
+
+return:                                           ; preds = %if.end, %if.then
+  %1 = load i32, i32* %retval, align 4, !dbg !13
+  ret i32 %1, !dbg !13
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
+!1 = !DIFile(filename: "coverage-warning.c", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!5 = !DISubroutineType(types: !2)
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{!"clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)"}
+!9 = !DILocation(line: 2, column: 7, scope: !4)
+!10 = !DILocation(line: 2, column: 9, scope: !4)
+!11 = !DILocation(line: 3, column: 5, scope: !4)
+!12 = !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)

Added: llvm/trunk/test/Transforms/SampleProfile/discriminator.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/discriminator.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/discriminator.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/discriminator.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,90 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/discriminator.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/discriminator.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original code
+;
+; 1   int foo(int i) {
+; 2     int x = 0;
+; 3     while (i < 100) {
+; 4       if (i < 5) x--;
+; 5       i++;
+; 6     }
+; 7     return x;
+; 8   }
+;
+; In this test, if the loop is executed 100 times, the decrement operation
+; at line 4 should only execute 5 times. This is reflected in the profile
+; data for line offset 3.  In Inputs/discriminator.prof, we have:
+;
+; 3: 100
+; 3.1: 5
+;
+; This means that the predicate 'i < 5' (line 3) is executed 100 times,
+; but the then branch (line 3.1) is only executed 5 times.
+
+define i32 @foo(i32 %i) #0 !dbg !4 {
+; CHECK: Printing analysis 'Branch Probability Analysis' for function 'foo':
+entry:
+  %i.addr = alloca i32, align 4
+  %x = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %x, align 4, !dbg !10
+  br label %while.cond, !dbg !11
+
+while.cond:                                       ; preds = %if.end, %entry
+  %0 = load i32, i32* %i.addr, align 4, !dbg !12
+  %cmp = icmp slt i32 %0, 100, !dbg !12
+  br i1 %cmp, label %while.body, label %while.end, !dbg !12
+; CHECK: edge while.cond -> while.body probability is 0x7d83ba68 / 0x80000000 = 98.06% [HOT edge]
+; CHECK: edge while.cond -> while.end probability is 0x027c4598 / 0x80000000 = 1.94%
+
+while.body:                                       ; preds = %while.cond
+  %1 = load i32, i32* %i.addr, align 4, !dbg !14
+  %cmp1 = icmp slt i32 %1, 50, !dbg !14
+  br i1 %cmp1, label %if.then, label %if.end, !dbg !14
+; CHECK: edge while.body -> if.then probability is 0x07878788 / 0x80000000 = 5.88%
+; CHECK: edge while.body -> if.end probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge]
+
+if.then:                                          ; preds = %while.body
+  %2 = load i32, i32* %x, align 4, !dbg !17
+  %dec = add nsw i32 %2, -1, !dbg !17
+  store i32 %dec, i32* %x, align 4, !dbg !17
+  br label %if.end, !dbg !17
+
+if.end:                                           ; preds = %if.then, %while.body
+  %3 = load i32, i32* %i.addr, align 4, !dbg !19
+  %inc = add nsw i32 %3, 1, !dbg !19
+  store i32 %inc, i32* %i.addr, align 4, !dbg !19
+  br label %while.cond, !dbg !20
+
+while.end:                                        ; preds = %while.cond
+  %4 = load i32, i32* %x, align 4, !dbg !21
+  ret i32 %4, !dbg !21
+}
+
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "discriminator.c", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "discriminator.c", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.5 "}
+!10 = !DILocation(line: 2, scope: !4)
+!11 = !DILocation(line: 3, scope: !4)
+!12 = !DILocation(line: 3, scope: !13)
+!13 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !4)
+!14 = !DILocation(line: 4, scope: !15)
+!15 = distinct !DILexicalBlock(line: 4, column: 0, file: !1, scope: !16)
+!16 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
+!17 = !DILocation(line: 4, scope: !18)
+!18 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !15)
+!19 = !DILocation(line: 5, scope: !16)
+!20 = !DILocation(line: 6, scope: !16)
+!21 = !DILocation(line: 7, scope: !4)

Added: llvm/trunk/test/Transforms/SampleProfile/early-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/early-inline.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/early-inline.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/early-inline.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,76 @@
+; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/einline.prof -S | FileCheck %s
+
+; Checks if both call and invoke can be inlined early if their inlined
+; instances are hot in profile.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+ at _ZTIi = external constant i8*
+
+; Function Attrs: uwtable
+define void @_Z3foov() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !6 {
+  %1 = alloca i8*
+  %2 = alloca i32
+  %3 = alloca i32, align 4
+; CHECK: call void @no_inline
+  call void @no_inline(), !dbg !16
+; CHECK-NOT: call
+  call void @_ZL3barv(), !dbg !9
+; CHECK-NOT: invoke
+  invoke void @_ZL3barv()
+          to label %4 unwind label %5, !dbg !10
+
+; <label>:4:
+  ret void
+
+; <label>:5:
+  %6 = landingpad { i8*, i32 }
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  ret void
+}
+
+; Function Attrs: nounwind uwtable
+define internal void @_ZL3barv() #0 !dbg !12 {
+  ret void
+}
+
+; CHECK-LABEL: @recursive
+define void @recursive() #0 !dbg !13 {
+; Recursive calls should not be early-inlined.
+; CHECK-NOT: call void @recursive
+; CHECK: call void @recursive
+; CHECK: call void @recursive
+; CHECK-NOT: call void @recursive
+; CHECK: ret
+  call void @recursive(), !dbg !14
+  call void @recursive(), !dbg !15
+  ret void
+}
+
+; The callee has mismatch attributes to the caller, it should not be inlined
+define void @no_inline() #1 !dbg !17 {
+  ret void
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+attributes #0 = {"target-features"="+sse4.1"}
+attributes #1 = {"target-features"="+sse4.2"}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1)
+!1 = !DIFile(filename: "a", directory: "b/")
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = distinct !DISubprogram(linkageName: "_Z3foov", scope: !1, file: !1, line: 5, scopeLine: 5, unit: !0)
+!9 = !DILocation(line: 6, column: 3, scope: !6)
+!10 = !DILocation(line: 8, column: 5, scope: !11)
+!11 = distinct !DILexicalBlock(scope: !6, file: !1, line: 7, column: 7)
+!12 = distinct !DISubprogram(linkageName: "_ZL3barv", scope: !1, file: !1, line: 20, scopeLine: 20, unit: !0)
+!13 = distinct !DISubprogram(linkageName: "recursive", scope: !1, file: !1, line: 20, scopeLine: 20, unit: !0)
+!14 = !DILocation(line: 21, column: 3, scope: !13)
+!15 = !DILocation(line: 22, column: 3, scope: !13)
+!16 = !DILocation(line: 7, column: 3, scope: !6)
+!17 = distinct !DISubprogram(linkageName: "no_inline", scope: !1, file: !1, line: 20, scopeLine: 20, unit: !0)

Added: llvm/trunk/test/Transforms/SampleProfile/entry_counts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/entry_counts.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/entry_counts.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/entry_counts.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/entry_counts.prof -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/entry_counts.prof -S | FileCheck %s
+
+; According to the profile, function empty() was called 13,293 times.
+; CHECK: {{.*}} = !{!"function_entry_count", i64 13294}
+
+define void @empty() !dbg !4 {
+entry:
+  ret void, !dbg !9
+}
+
+; This function does not have profile, check if function_entry_count is -1
+; CHECK: {{.*}} = !{!"function_entry_count", i64 -1}
+define void @no_profile() {
+entry:
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "entry_counts.c", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, retainedNodes: !2)
+!5 = !DISubroutineType(types: !2)
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{!"clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)"}
+!9 = !DILocation(line: 1, column: 15, scope: !4)

Added: llvm/trunk/test/Transforms/SampleProfile/entry_counts_cold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/entry_counts_cold.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/entry_counts_cold.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/entry_counts_cold.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,170 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/entry_counts_cold.prof -S | FileCheck %s
+; ModuleID = 'temp.bc'
+source_filename = "temp.c"
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; Function Attrs: nounwind ssp uwtable
+; CHECK: define i32 @top({{.*}} !prof [[TOP:![0-9]+]] 
+define i32 @top(i32* %p) #0 !dbg !8 {
+entry:
+  %p.addr = alloca i32*, align 8
+  store i32* %p, i32** %p.addr, align 8, !tbaa !15
+  call void @llvm.dbg.declare(metadata i32** %p.addr, metadata !14, metadata !DIExpression()), !dbg !19
+  %0 = load i32*, i32** %p.addr, align 8, !dbg !20, !tbaa !15
+  %call = call i32 @foo(i32* %0), !dbg !21
+; foo is inlined
+; CHECK-NOT: call i32 @foo
+; CHECK: call i32 @bar
+  %1 = load i32*, i32** %p.addr, align 8, !dbg !22, !tbaa !15
+  %2 = load i32, i32* %1, align 4, !dbg !24, !tbaa !25
+  %tobool = icmp ne i32 %2, 0, !dbg !24
+  br i1 %tobool, label %if.then, label %if.end, !dbg !27
+
+if.then:                                          ; preds = %entry
+  %3 = load i32*, i32** %p.addr, align 8, !dbg !28, !tbaa !15
+; bar is not inlined
+; CHECK: call i32 @bar
+  %call1 = call i32 @bar(i32* %3), !dbg !29
+  br label %if.end, !dbg !29
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 0, !dbg !30
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind ssp uwtable
+; CHECK: define i32 @foo({{.*}} !prof [[FOO:![0-9]+]] 
+define i32 @foo(i32* %p) #0 !dbg !31 {
+entry:
+  %p.addr = alloca i32*, align 8
+  %a = alloca i32, align 4
+  store i32* %p, i32** %p.addr, align 8, !tbaa !15
+  call void @llvm.dbg.declare(metadata i32** %p.addr, metadata !33, metadata !DIExpression()), !dbg !35
+  %0 = bitcast i32* %a to i8*, !dbg !36
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4, !dbg !36
+  call void @llvm.dbg.declare(metadata i32* %a, metadata !34, metadata !DIExpression()), !dbg !37
+  %1 = load i32*, i32** %p.addr, align 8, !dbg !38, !tbaa !15
+  %arrayidx = getelementptr inbounds i32, i32* %1, i64 3, !dbg !38
+  %2 = load i32, i32* %arrayidx, align 4, !dbg !38, !tbaa !25
+  %3 = load i32*, i32** %p.addr, align 8, !dbg !39, !tbaa !15
+  %arrayidx1 = getelementptr inbounds i32, i32* %3, i64 2, !dbg !39
+  %4 = load i32, i32* %arrayidx1, align 4, !dbg !40, !tbaa !25
+  %add = add nsw i32 %4, %2, !dbg !40
+  store i32 %add, i32* %arrayidx1, align 4, !dbg !40, !tbaa !25
+  %5 = load i32*, i32** %p.addr, align 8, !dbg !41, !tbaa !15
+  %call = call i32 @bar(i32* %5), !dbg !42
+  store i32 %call, i32* %a, align 4, !dbg !43, !tbaa !25
+  %6 = load i32, i32* %a, align 4, !dbg !44, !tbaa !25
+  %add2 = add nsw i32 %6, 1, !dbg !45
+  %7 = bitcast i32* %a to i8*, !dbg !46
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %7) #4, !dbg !46
+  ret i32 %add2, !dbg !47
+}
+
+; Function Attrs: nounwind ssp uwtable
+; CHECK: define i32 @bar({{.*}} !prof [[BAR:![0-9]+]] 
+define i32 @bar(i32* %p) #0 !dbg !48 {
+entry:
+  %p.addr = alloca i32*, align 8
+  store i32* %p, i32** %p.addr, align 8, !tbaa !15
+  call void @llvm.dbg.declare(metadata i32** %p.addr, metadata !50, metadata !DIExpression()), !dbg !51
+  ; CHECK: call void (...) @baz{{.*}} !prof [[BAZ:![0-9]+]]
+  call void (...) @baz(), !dbg !52
+  %0 = load i32*, i32** %p.addr, align 8, !dbg !53, !tbaa !15
+  %arrayidx = getelementptr inbounds i32, i32* %0, i64 2, !dbg !53
+  %1 = load i32, i32* %arrayidx, align 4, !dbg !53, !tbaa !25
+  %2 = load i32*, i32** %p.addr, align 8, !dbg !54, !tbaa !15
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i64 1, !dbg !54
+  %3 = load i32, i32* %arrayidx1, align 4, !dbg !55, !tbaa !25
+  %add = add nsw i32 %3, %1, !dbg !55
+  store i32 %add, i32* %arrayidx1, align 4, !dbg !55, !tbaa !25
+  %4 = load i32*, i32** %p.addr, align 8, !dbg !56, !tbaa !15
+  %arrayidx2 = getelementptr inbounds i32, i32* %4, i64 3, !dbg !56
+  %5 = load i32, i32* %arrayidx2, align 4, !dbg !56, !tbaa !25
+  ret i32 %5, !dbg !57
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2
+
+declare void @baz(...) #3
+
+attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+; CHECK: [[TOP]] = !{!"function_entry_count", i64 101}
+; CHECK: [[FOO]] = !{!"function_entry_count", i64 151}
+; CHECK: [[BAR]] = !{!"function_entry_count", i64 303}
+; CHECK: [[BAZ]] = !{!"branch_weights", i64 303}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: GNU)
+!1 = !DIFile(filename: "temp.c", directory: "llvm/test/Transforms/SampleProfile")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!7 = !{!"clang version 8.0.0"}
+!8 = distinct !DISubprogram(name: "top", scope: !1, file: !1, line: 5, type: !9, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11, !12}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!13 = !{!14}
+!14 = !DILocalVariable(name: "p", arg: 1, scope: !8, file: !1, line: 5, type: !12)
+!15 = !{!16, !16, i64 0}
+!16 = !{!"any pointer", !17, i64 0}
+!17 = !{!"omnipotent char", !18, i64 0}
+!18 = !{!"Simple C/C++ TBAA"}
+!19 = !DILocation(line: 5, column: 14, scope: !8)
+!20 = !DILocation(line: 6, column: 7, scope: !8)
+!21 = !DILocation(line: 6, column: 3, scope: !8)
+!22 = !DILocation(line: 7, column: 8, scope: !23)
+!23 = distinct !DILexicalBlock(scope: !8, file: !1, line: 7, column: 7)
+!24 = !DILocation(line: 7, column: 7, scope: !23)
+!25 = !{!26, !26, i64 0}
+!26 = !{!"int", !17, i64 0}
+!27 = !DILocation(line: 7, column: 7, scope: !8)
+!28 = !DILocation(line: 8, column: 9, scope: !23)
+!29 = !DILocation(line: 8, column: 5, scope: !23)
+!30 = !DILocation(line: 9, column: 3, scope: !8)
+!31 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 12, type: !9, scopeLine: 12, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !32)
+!32 = !{!33, !34}
+!33 = !DILocalVariable(name: "p", arg: 1, scope: !31, file: !1, line: 12, type: !12)
+!34 = !DILocalVariable(name: "a", scope: !31, file: !1, line: 13, type: !11)
+!35 = !DILocation(line: 12, column: 14, scope: !31)
+!36 = !DILocation(line: 13, column: 3, scope: !31)
+!37 = !DILocation(line: 13, column: 7, scope: !31)
+!38 = !DILocation(line: 14, column: 11, scope: !31)
+!39 = !DILocation(line: 14, column: 3, scope: !31)
+!40 = !DILocation(line: 14, column: 8, scope: !31)
+!41 = !DILocation(line: 15, column: 11, scope: !31)
+!42 = !DILocation(line: 15, column: 7, scope: !31)
+!43 = !DILocation(line: 15, column: 5, scope: !31)
+!44 = !DILocation(line: 16, column: 10, scope: !31)
+!45 = !DILocation(line: 16, column: 11, scope: !31)
+!46 = !DILocation(line: 17, column: 1, scope: !31)
+!47 = !DILocation(line: 16, column: 3, scope: !31)
+!48 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 19, type: !9, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !49)
+!49 = !{!50}
+!50 = !DILocalVariable(name: "p", arg: 1, scope: !48, file: !1, line: 19, type: !12)
+!51 = !DILocation(line: 19, column: 15, scope: !48)
+!52 = !DILocation(line: 20, column: 3, scope: !48)
+!53 = !DILocation(line: 21, column: 11, scope: !48)
+!54 = !DILocation(line: 21, column: 3, scope: !48)
+!55 = !DILocation(line: 21, column: 8, scope: !48)
+!56 = !DILocation(line: 22, column: 10, scope: !48)
+!57 = !DILocation(line: 22, column: 3, scope: !48)

Added: llvm/trunk/test/Transforms/SampleProfile/flattened.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/flattened.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/flattened.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/flattened.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; Check flattened profile will not be read in thinlto postlink.
+; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -perform-thinlto=true -S | FileCheck %s
+; RUN: opt < %s -passes='thinlto<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s
+;
+; Check flattened profile will be read in thinlto prelink.
+; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -prepare-for-thinlto=true -S | FileCheck %s --check-prefix=PRELINK
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s --check-prefix=PRELINK
+;
+; Check flattened profile will be read in non-thinlto mode.
+; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -S | FileCheck %s --check-prefix=NOTHINLTO
+; RUN: opt < %s -passes='default<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s --check-prefix=NOTHINLTO
+;
+; CHECK-NOT: !{!"ProfileFormat", !"SampleProfile"}
+; PRELINK:   !{!"ProfileFormat", !"SampleProfile"}
+; NOTHINLTO: !{!"ProfileFormat", !"SampleProfile"}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define dso_local i32 @foo() local_unnamed_addr !dbg !7 {
+entry:
+  ret i32 -1, !dbg !9
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 345241)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "a.c", directory: "")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 8.0.0 (trunk 345241)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !2)
+!9 = !DILocation(line: 2, column: 3, scope: !7)

Added: llvm/trunk/test/Transforms/SampleProfile/fnptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/fnptr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/fnptr.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/fnptr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,157 @@
+; The two profiles used in this test are the same but encoded in different
+; formats. This checks that we produce the same profile annotations regardless
+; of the profile format.
+;
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s
+
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s
+
+; CHECK:   edge for.body3 -> if.then probability is 0x1a56a56a / 0x80000000 = 20.58%
+; CHECK:   edge for.body3 -> if.else probability is 0x65a95a96 / 0x80000000 = 79.42%
+; CHECK:   edge for.inc -> for.inc12 probability is 0x000fbd1c / 0x80000000 = 0.05%
+; CHECK:   edge for.inc -> for.body3 probability is 0x7ff042e4 / 0x80000000 = 99.95%
+; CHECK:   edge for.inc12 -> for.end14 probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK:   edge for.inc12 -> for.cond1.preheader probability is 0x7c000000 / 0x80000000 = 96.88%
+
+; Original C++ test case.
+;
+; #include <stdlib.h>
+; #include <math.h>
+; #include <stdio.h>
+;
+; #define N 10000
+; #define M 6000
+;
+; double foo(int x) {
+;   return x * sin((double)x);
+; }
+;
+; double bar(int x) {
+;   return x - cos((double)x);
+; }
+;
+; int main() {
+;   double (*fptr)(int);
+;   double S = 0;
+;   for (int i = 0; i < N; i++)
+;     for (int j = 0; j < M; j++) {
+;       fptr = (rand() % 100 < 30) ? foo : bar;
+;       if (rand() % 100 < 10)
+;         S += (*fptr)(i + j * 300);
+;       else
+;         S += (*fptr)(i - j / 840);
+;     }
+;   printf("S = %lf\n", S);
+;   return 0;
+; }
+
+ at .str = private unnamed_addr constant [9 x i8] c"S = %lf\0A\00", align 1
+
+define double @_Z3fooi(i32 %x) #0 !dbg !3 {
+entry:
+  %conv = sitofp i32 %x to double, !dbg !2
+  %call = tail call double @sin(double %conv) #3, !dbg !8
+  %mul = fmul double %conv, %call, !dbg !8
+  ret double %mul, !dbg !8
+}
+
+declare double @sin(double) #1
+
+define double @_Z3bari(i32 %x) #0 !dbg !10 {
+entry:
+  %conv = sitofp i32 %x to double, !dbg !9
+  %call = tail call double @cos(double %conv) #3, !dbg !11
+  %sub = fsub double %conv, %call, !dbg !11
+  ret double %sub, !dbg !11
+}
+
+declare double @cos(double) #1
+
+define i32 @main() #2 !dbg !13 {
+entry:
+  br label %for.cond1.preheader, !dbg !12
+
+for.cond1.preheader:                              ; preds = %for.inc12, %entry
+  %i.025 = phi i32 [ 0, %entry ], [ %inc13, %for.inc12 ]
+  %S.024 = phi double [ 0.000000e+00, %entry ], [ %S.2.lcssa, %for.inc12 ]
+  br label %for.body3, !dbg !14
+
+for.body3:                                        ; preds = %for.inc, %for.cond1.preheader
+  %j.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.inc ]
+  %S.122 = phi double [ %S.024, %for.cond1.preheader ], [ %S.2, %for.inc ]
+  %call = tail call i32 @rand() #3, !dbg !15
+  %rem = srem i32 %call, 100, !dbg !15
+  %cmp4 = icmp slt i32 %rem, 30, !dbg !15
+  %_Z3fooi._Z3bari = select i1 %cmp4, double (i32)* @_Z3fooi, double (i32)* @_Z3bari, !dbg !15
+  %call5 = tail call i32 @rand() #3, !dbg !16
+  %rem6 = srem i32 %call5, 100, !dbg !16
+  %cmp7 = icmp slt i32 %rem6, 10, !dbg !16
+  br i1 %cmp7, label %if.then, label %if.else, !dbg !16
+
+if.then:                                          ; preds = %for.body3
+  %mul = mul nsw i32 %j.023, 300, !dbg !18
+  %add = add nsw i32 %mul, %i.025, !dbg !18
+  %call8 = tail call double %_Z3fooi._Z3bari(i32 %add), !dbg !18
+  br label %for.inc, !dbg !18
+
+if.else:                                          ; preds = %for.body3
+  %div = sdiv i32 %j.023, 840, !dbg !19
+  %sub = sub nsw i32 %i.025, %div, !dbg !19
+  %call10 = tail call double %_Z3fooi._Z3bari(i32 %sub), !dbg !19
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %if.else
+  %call8.pn = phi double [ %call8, %if.then ], [ %call10, %if.else ]
+  %S.2 = fadd double %S.122, %call8.pn, !dbg !18
+  %inc = add nsw i32 %j.023, 1, !dbg !20
+  %exitcond = icmp eq i32 %j.023, 5999, !dbg !14
+  br i1 %exitcond, label %for.inc12, label %for.body3, !dbg !14
+
+for.inc12:                                        ; preds = %for.inc
+  %S.2.lcssa = phi double [ %S.2, %for.inc ]
+  %inc13 = add nsw i32 %i.025, 1, !dbg !22
+  %exitcond26 = icmp eq i32 %i.025, 9999, !dbg !12
+  br i1 %exitcond26, label %for.end14, label %for.cond1.preheader, !dbg !12
+
+for.end14:                                        ; preds = %for.inc12
+  %S.2.lcssa.lcssa = phi double [ %S.2.lcssa, %for.inc12 ]
+  %call15 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), double %S.2.lcssa.lcssa), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+; Function Attrs: nounwind
+declare i32 @rand() #1
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+!llvm.dbg.cu = !{!26}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{!"clang version 3.6.0 "}
+!2 = !DILocation(line: 9, column: 3, scope: !3)
+!3 = distinct !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !26, scopeLine: 8, file: !4, scope: !5, type: !6, retainedNodes: !7)
+!4 = !DIFile(filename: "fnptr.cc", directory: ".")
+!5 = !DIFile(filename: "fnptr.cc", directory: ".")
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!8 = !DILocation(line: 9, column: 14, scope: !3)
+!9 = !DILocation(line: 13, column: 3, scope: !10)
+!10 = distinct !DISubprogram(name: "bar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !26, scopeLine: 12, file: !4, scope: !5, type: !6, retainedNodes: !7)
+!11 = !DILocation(line: 13, column: 14, scope: !10)
+!12 = !DILocation(line: 19, column: 3, scope: !13)
+!13 = distinct !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !26, scopeLine: 16, file: !4, scope: !5, type: !6, retainedNodes: !7)
+!14 = !DILocation(line: 20, column: 5, scope: !13)
+!15 = !DILocation(line: 21, column: 15, scope: !13)
+!16 = !DILocation(line: 22, column: 11, scope: !13)
+!18 = !DILocation(line: 23, column: 14, scope: !13)
+!19 = !DILocation(line: 25, column: 14, scope: !13)
+!20 = !DILocation(line: 20, column: 28, scope: !13)
+!22 = !DILocation(line: 19, column: 26, scope: !13)
+!24 = !DILocation(line: 27, column: 3, scope: !13)
+!25 = !DILocation(line: 28, column: 3, scope: !13)
+!26 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: FullDebug, file: !4)

Added: llvm/trunk/test/Transforms/SampleProfile/function_metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/function_metadata.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/function_metadata.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/function_metadata.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,59 @@
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/function_metadata.prof -S | FileCheck %s
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/function_metadata.compact.afdo -S | FileCheck %s
+
+; Tests whether the functions in the inline stack are added to the
+; function_entry_count metadata.
+
+declare void @foo()
+
+define void @foo_available() !dbg !11 {
+  ret void
+}
+
+; CHECK: define void @test({{.*}} !prof ![[ENTRY_TEST:[0-9]+]]
+define void @test(void ()*) !dbg !7 {
+  %2 = alloca void ()*
+  store void ()* %0, void ()** %2
+  %3 = load void ()*, void ()** %2
+  ; CHECK: call {{.*}}, !prof ![[PROF:[0-9]+]]
+  call void @foo(), !dbg !18
+  call void %3(), !dbg !19
+  ret void
+}
+
+; CHECK: define void @test_liveness({{.*}} !prof ![[ENTRY_TEST_LIVENESS:[0-9]+]]
+define void @test_liveness() !dbg !12 {
+  call void @foo(), !dbg !20
+  ret void
+}
+
+; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to
+; make sure hot inline stacks are imported. The total count of baz is lower
+; than the hot cutoff threshold and its GUID should not be included in the
+; metadata.
+; CHECK: ![[ENTRY_TEST]] = !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905,  i64 -7908226060800700466, i64 -2012135647395072713}
+
+; Check GUIDs for both foo and foo_available are included in the metadata to
+; make sure the liveness analysis can capture the dependency from test_liveness
+; to foo_available.
+; CHECK: ![[ENTRY_TEST_LIVENESS]] = !{!"function_entry_count", i64 1, i64 4005816710939881937, i64 6699318081062747564}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "test", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, retainedNodes: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = distinct !DISubprogram(name: "foo_available", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, retainedNodes: !2)
+!12 = distinct !DISubprogram(name: "test_liveness", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, retainedNodes: !2)
+!15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !17)
+!19 = !DILocation(line: 11, scope: !17)
+!20 = !DILocation(line: 8, scope: !12)

Added: llvm/trunk/test/Transforms/SampleProfile/gcc-simple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/gcc-simple.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/gcc-simple.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/gcc-simple.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,218 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/gcc-simple.afdo -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/gcc-simple.afdo -S | FileCheck %s
+; XFAIL: powerpc-, powerpc64-, s390x, mips-, mips64-, sparc
+; Original code:
+;
+; #include <stdlib.h>
+;
+; long long int foo(long i) {
+;   if (rand() < 500) return 2; else if (rand() > 5000) return 10; else return 90;
+; }
+;
+; int main() {
+;   long long int sum = 0;
+;   for (int k = 0; k < 3000; k++)
+;     for (int i = 0; i < 200000; i++) sum += foo(i);
+;   return sum > 0 ? 0 : 1;
+; }
+;
+; This test was compiled down to bytecode at -O0 to avoid inlining foo() into
+; main(). The profile was generated using a GCC-generated binary (also compiled
+; at -O0). The conversion from the Linux Perf profile to the GCC autofdo
+; profile used the converter at https://github.com/google/autofdo
+;
+; $ gcc -g -O0 gcc-simple.cc -o gcc-simple
+; $ perf record -b ./gcc-simple
+; $ create_gcov --binary=gcc-simple --gcov=gcc-simple.afdo
+
+define i64 @_Z3fool(i64 %i) #0 !dbg !4 {
+; CHECK: !prof ![[EC1:[0-9]+]]
+entry:
+  %retval = alloca i64, align 8
+  %i.addr = alloca i64, align 8
+  store i64 %i, i64* %i.addr, align 8
+  call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !16, metadata !17), !dbg !18
+  %call = call i32 @rand() #3, !dbg !19
+  %cmp = icmp slt i32 %call, 500, !dbg !21
+  br i1 %cmp, label %if.then, label %if.else, !dbg !22
+; CHECK: !prof ![[PROF1:[0-9]+]]
+
+if.then:                                          ; preds = %entry
+  store i64 2, i64* %retval, align 8, !dbg !23
+  br label %return, !dbg !23
+
+if.else:                                          ; preds = %entry
+  %call1 = call i32 @rand() #3, !dbg !25
+  %cmp2 = icmp sgt i32 %call1, 5000, !dbg !28
+  br i1 %cmp2, label %if.then.3, label %if.else.4, !dbg !29
+; CHECK: !prof ![[PROF2:[0-9]+]]
+
+if.then.3:                                        ; preds = %if.else
+  store i64 10, i64* %retval, align 8, !dbg !30
+  br label %return, !dbg !30
+
+if.else.4:                                        ; preds = %if.else
+  store i64 90, i64* %retval, align 8, !dbg !32
+  br label %return, !dbg !32
+
+return:                                           ; preds = %if.else.4, %if.then.3, %if.then
+  %0 = load i64, i64* %retval, align 8, !dbg !34
+  ret i64 %0, !dbg !34
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind
+declare i32 @rand() #2
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !9 {
+; CHECK: !prof ![[EC2:[0-9]+]]
+entry:
+  %retval = alloca i32, align 4
+  %sum = alloca i64, align 8
+  %k = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  call void @llvm.dbg.declare(metadata i64* %sum, metadata !35, metadata !17), !dbg !36
+  store i64 0, i64* %sum, align 8, !dbg !36
+  call void @llvm.dbg.declare(metadata i32* %k, metadata !37, metadata !17), !dbg !39
+  store i32 0, i32* %k, align 4, !dbg !39
+  br label %for.cond, !dbg !40
+
+for.cond:                                         ; preds = %for.inc.4, %entry
+  %0 = load i32, i32* %k, align 4, !dbg !41
+  %cmp = icmp slt i32 %0, 3000, !dbg !45
+  br i1 %cmp, label %for.body, label %for.end.6, !dbg !46
+; CHECK: !prof ![[PROF3:[0-9]+]]
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !47, metadata !17), !dbg !49
+  store i32 0, i32* %i, align 4, !dbg !49
+  br label %for.cond.1, !dbg !50
+
+for.cond.1:                                       ; preds = %for.inc, %for.body
+  %1 = load i32, i32* %i, align 4, !dbg !51
+  %cmp2 = icmp slt i32 %1, 200000, !dbg !55
+  br i1 %cmp2, label %for.body.3, label %for.end, !dbg !56
+; CHECK: !prof ![[PROF4:[0-9]+]]
+
+for.body.3:                                       ; preds = %for.cond.1
+  %2 = load i32, i32* %i, align 4, !dbg !57
+  %conv = sext i32 %2 to i64, !dbg !57
+  %call = call i64 @_Z3fool(i64 %conv), !dbg !59
+  %3 = load i64, i64* %sum, align 8, !dbg !60
+  %add = add nsw i64 %3, %call, !dbg !60
+  store i64 %add, i64* %sum, align 8, !dbg !60
+  br label %for.inc, !dbg !61
+
+for.inc:                                          ; preds = %for.body.3
+  %4 = load i32, i32* %i, align 4, !dbg !62
+  %inc = add nsw i32 %4, 1, !dbg !62
+  store i32 %inc, i32* %i, align 4, !dbg !62
+  br label %for.cond.1, !dbg !64
+
+for.end:                                          ; preds = %for.cond.1
+  br label %for.inc.4, !dbg !65
+
+for.inc.4:                                        ; preds = %for.end
+  %5 = load i32, i32* %k, align 4, !dbg !67
+  %inc5 = add nsw i32 %5, 1, !dbg !67
+  store i32 %inc5, i32* %k, align 4, !dbg !67
+  br label %for.cond, !dbg !68
+
+for.end.6:                                        ; preds = %for.cond
+  %6 = load i64, i64* %sum, align 8, !dbg !69
+  %cmp7 = icmp sgt i64 %6, 0, !dbg !70
+  %cond = select i1 %cmp7, i32 0, i32 1, !dbg !69
+  ret i32 %cond, !dbg !71
+}
+
+; CHECK ![[EC1]] = !{!"function_entry_count", i64 24108}
+; CHECK ![[PROF1]] = !{!"branch_weights", i32 1, i32 30124}
+; CHECK ![[PROF2]] = !{!"branch_weights", i32 30177, i32 29579}
+; CHECK ![[EC2]] = !{!"function_entry_count", i64 0}
+; CHECK ![[PROF3]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK ![[PROF4]] = !{!"branch_weights", i32 1, i32 20238}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "discriminator.cc", directory: "/usr/local/google/home/dnovillo/llvm/test/autofdo")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !8}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!12}
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)"}
+!16 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !8)
+!17 = !DIExpression()
+!18 = !DILocation(line: 3, column: 24, scope: !4)
+!19 = !DILocation(line: 4, column: 7, scope: !20)
+!20 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 7)
+!21 = !DILocation(line: 4, column: 14, scope: !20)
+!22 = !DILocation(line: 4, column: 7, scope: !4)
+!23 = !DILocation(line: 4, column: 21, scope: !24)
+!24 = !DILexicalBlockFile(scope: !20, file: !1, discriminator: 1)
+!25 = !DILocation(line: 4, column: 40, scope: !26)
+!26 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 2)
+!27 = distinct !DILexicalBlock(scope: !20, file: !1, line: 4, column: 40)
+!28 = !DILocation(line: 4, column: 47, scope: !27)
+!29 = !DILocation(line: 4, column: 40, scope: !20)
+!30 = !DILocation(line: 4, column: 55, scope: !31)
+!31 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 3)
+!32 = !DILocation(line: 4, column: 71, scope: !33)
+!33 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 4)
+!34 = !DILocation(line: 5, column: 1, scope: !4)
+!35 = !DILocalVariable(name: "sum", scope: !9, file: !1, line: 8, type: !7)
+!36 = !DILocation(line: 8, column: 17, scope: !9)
+!37 = !DILocalVariable(name: "k", scope: !38, file: !1, line: 9, type: !12)
+!38 = distinct !DILexicalBlock(scope: !9, file: !1, line: 9, column: 3)
+!39 = !DILocation(line: 9, column: 12, scope: !38)
+!40 = !DILocation(line: 9, column: 8, scope: !38)
+!41 = !DILocation(line: 9, column: 19, scope: !42)
+!42 = !DILexicalBlockFile(scope: !43, file: !1, discriminator: 2)
+!43 = !DILexicalBlockFile(scope: !44, file: !1, discriminator: 1)
+!44 = distinct !DILexicalBlock(scope: !38, file: !1, line: 9, column: 3)
+!45 = !DILocation(line: 9, column: 21, scope: !44)
+!46 = !DILocation(line: 9, column: 3, scope: !38)
+!47 = !DILocalVariable(name: "i", scope: !48, file: !1, line: 10, type: !12)
+!48 = distinct !DILexicalBlock(scope: !44, file: !1, line: 10, column: 5)
+!49 = !DILocation(line: 10, column: 14, scope: !48)
+!50 = !DILocation(line: 10, column: 10, scope: !48)
+!51 = !DILocation(line: 10, column: 21, scope: !52)
+!52 = !DILexicalBlockFile(scope: !53, file: !1, discriminator: 5)
+!53 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 1)
+!54 = distinct !DILexicalBlock(scope: !48, file: !1, line: 10, column: 5)
+!55 = !DILocation(line: 10, column: 23, scope: !54)
+!56 = !DILocation(line: 10, column: 5, scope: !48)
+!57 = !DILocation(line: 10, column: 49, scope: !58)
+!58 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 2)
+!59 = !DILocation(line: 10, column: 45, scope: !54)
+!60 = !DILocation(line: 10, column: 42, scope: !54)
+!61 = !DILocation(line: 10, column: 38, scope: !54)
+!62 = !DILocation(line: 10, column: 34, scope: !63)
+!63 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 4)
+!64 = !DILocation(line: 10, column: 5, scope: !54)
+!65 = !DILocation(line: 10, column: 50, scope: !66)
+!66 = !DILexicalBlockFile(scope: !48, file: !1, discriminator: 3)
+!67 = !DILocation(line: 9, column: 30, scope: !44)
+!68 = !DILocation(line: 9, column: 3, scope: !44)
+!69 = !DILocation(line: 11, column: 10, scope: !9)
+!70 = !DILocation(line: 11, column: 14, scope: !9)
+!71 = !DILocation(line: 11, column: 3, scope: !9)

Added: llvm/trunk/test/Transforms/SampleProfile/indirect-call-gcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/indirect-call-gcc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/indirect-call-gcc.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/indirect-call-gcc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call.afdo -S | FileCheck %s
+
+; Checks if indirect call targets are read correctly when reading from gcc
+; format profile.
+; It is expected to fail on certain architectures as gcc profile reader does
+; not work.
+; XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
+
+define void @test(void ()*) !dbg !3 {
+  %2 = alloca void ()*
+  store void ()* %0, void ()** %2
+  %3 = load void ()*, void ()** %2
+  ; CHECK: call {{.*}}, !prof ![[PROF:[0-9]+]]
+  call void %3(), !dbg !4
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1)
+!1 = !DIFile(filename: "test.cc", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, unit: !0)
+!4 = !DILocation(line: 5, scope: !3)
+; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398}

Added: llvm/trunk/test/Transforms/SampleProfile/indirect-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/indirect-call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/indirect-call.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/indirect-call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,213 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call.prof -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call.compact.afdo -S | FileCheck %s
+
+; CHECK-LABEL: @test
+define void @test(void ()*) !dbg !3 {
+  %2 = alloca void ()*
+  store void ()* %0, void ()** %2
+  %3 = load void ()*, void ()** %2
+  ; CHECK: call {{.*}}, !prof ![[PROF:[0-9]+]]
+  call void %3(), !dbg !4
+  ret void
+}
+
+; CHECK-LABEL: @test_inline
+; If the indirect call is promoted and inlined in profile, we should promote and inline it.
+define void @test_inline(i64* (i32*)*, i32* %x) !dbg !6 {
+  %2 = alloca i64* (i32*)*
+  store i64* (i32*)* %0, i64* (i32*)** %2
+  %3 = load i64* (i32*)*, i64* (i32*)** %2
+; CHECK: icmp {{.*}} @foo_inline2
+; CHECK: br {{.*}} !prof ![[BR1:[0-9]+]]
+; CHECK: if.true.direct_targ:
+; CHECK-NOT: call
+; CHECK: if.false.orig_indirect:
+; CHECK: icmp {{.*}} @foo_inline1
+; CHECK: br {{.*}} !prof ![[BR2:[0-9]+]]
+; CHECK: if.true.direct_targ1:
+; CHECK-NOT: call
+; CHECK: if.false.orig_indirect2:
+; CHECK: call {{.*}} !prof ![[VP:[0-9]+]]
+  call i64* %3(i32* %x), !dbg !7
+  ret void
+}
+
+; CHECK-LABEL: @test_inline_strip
+; If the indirect call is promoted and inlined in profile, and the callee name
+; is stripped we should promote and inline it.
+define void @test_inline_strip(i64* (i32*)*, i32* %x) !dbg !8 {
+  %2 = alloca i64* (i32*)*
+  store i64* (i32*)* %0, i64* (i32*)** %2
+  %3 = load i64* (i32*)*, i64* (i32*)** %2
+; CHECK: icmp {{.*}} @foo_inline_strip.suffix
+; CHECK: if.true.direct_targ:
+; CHECK-NOT: call
+; CHECK: if.false.orig_indirect:
+; CHECK: call
+  call i64* %3(i32* %x), !dbg !9
+  ret void
+}
+
+; CHECK-LABEL: @test_inline_strip_conflict
+; If the indirect call is promoted and inlined in profile, and the callee name
+; is stripped, but have more than 1 potential match, we should not promote.
+define void @test_inline_strip_conflict(i64* (i32*)*, i32* %x) !dbg !10 {
+  %2 = alloca i64* (i32*)*
+  store i64* (i32*)* %0, i64* (i32*)** %2
+  %3 = load i64* (i32*)*, i64* (i32*)** %2
+; CHECK-NOT: if.true.direct_targ:
+  call i64* %3(i32* %x), !dbg !11
+  ret void
+}
+
+; CHECK-LABEL: @test_noinline
+; If the indirect call target is not available, we should not promote it.
+define void @test_noinline(void ()*) !dbg !12 {
+  %2 = alloca void ()*
+  store void ()* %0, void ()** %2
+  %3 = load void ()*, void ()** %2
+; CHECK-NOT: icmp
+; CHECK: call
+  call void %3(), !dbg !13
+  ret void
+}
+
+; CHECK-LABEL: @test_noinline_bitcast
+; If the indirect call has been promoted to a direct call with bitcast,
+; do not inline it.
+define float @test_noinline_bitcast(float ()*) !dbg !26 {
+  %2 = alloca float ()*
+  store float ()* %0, float ()** %2
+; CHECK: icmp
+; CHECK: call
+  %3 = load float ()*, float ()** %2
+  %4 = call float %3(), !dbg !27
+  ret float %4
+}
+
+; CHECK-LABEL: @test_norecursive_inline
+; If the indirect call target is the caller, we should not promote it.
+define void @test_norecursive_inline() !dbg !24 {
+; CHECK-NOT: icmp
+; CHECK: call
+  %1 = load void ()*, void ()** @y, align 8
+  call void %1(), !dbg !25
+  ret void
+}
+
+define i32* @return_arg(i32* readnone returned) !dbg !29{
+  ret i32* %0
+}
+
+; CHECK-LABEL: @return_arg_caller
+; When the promoted indirect call returns a parameter that was defined by the
+; return value of a previous direct call. Checks both direct call and promoted
+; indirect call are inlined.
+define i32* @return_arg_caller(i32* (i32*)* nocapture) !dbg !30{
+; CHECK-NOT: call i32* @foo_inline1
+; CHECK: if.true.direct_targ:
+; CHECK-NOT: call
+; CHECK: if.false.orig_indirect:
+; CHECK: call
+  %2 = call i32* @foo_inline1(i32* null), !dbg !31
+  %cmp = icmp ne i32* %2, null
+  br i1 %cmp, label %then, label %else
+
+then:
+  %3 = tail call i32* %0(i32* %2), !dbg !32
+  ret i32* %3
+
+else:
+  ret i32* null
+}
+
+ at x = global i32 0, align 4
+ at y = global void ()* null, align 8
+
+define i32* @foo_inline1(i32* %x) !dbg !14 {
+  ret i32* %x
+}
+
+define i32* @foo_inline_strip.suffix(i32* %x) !dbg !15 {
+  ret i32* %x
+}
+
+define i32* @foo_inline_strip_conflict.suffix1(i32* %x) !dbg !16 {
+  ret i32* %x
+}
+
+define i32* @foo_inline_strip_conflict.suffix2(i32* %x) !dbg !17 {
+  ret i32* %x
+}
+
+define i32* @foo_inline_strip_conflict.suffix3(i32* %x) !dbg !18 {
+  ret i32* %x
+}
+
+define i32* @foo_inline2(i32* %x) !dbg !19 {
+  ret i32* %x
+}
+
+define i32 @foo_noinline(i32 %x) !dbg !20 {
+  ret i32 %x
+}
+
+define void @foo_direct() !dbg !21 {
+  ret void
+}
+
+define i32 @foo_direct_i32() !dbg !28 {
+  ret i32 0;
+}
+
+; CHECK-LABEL: @test_direct
+; We should not promote a direct call.
+define void @test_direct() !dbg !22 {
+; CHECK-NOT: icmp
+; CHECK: call
+  call void @foo_alias(), !dbg !23
+  ret void
+}
+
+ at foo_alias = alias void (), void ()* @foo_direct
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1)
+!1 = !DIFile(filename: "test.cc", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 3, unit: !0)
+!4 = !DILocation(line: 4, scope: !3)
+!5 = !DILocation(line: 6, scope: !3)
+; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398}
+; CHECK: ![[BR1]] = !{!"branch_weights", i32 4000, i32 4000}
+; CHECK: ![[BR2]] = !{!"branch_weights", i32 3000, i32 1000}
+; CHECK: ![[VP]] = !{!"VP", i32 0, i64 8000, i64 -6391416044382067764, i64 1000}
+!6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0)
+!7 = !DILocation(line: 7, scope: !6)
+!8 = distinct !DISubprogram(name: "test_inline_strip", scope: !1, file: !1, line: 8, unit: !0)
+!9 = !DILocation(line: 9, scope: !8)
+!10 = distinct !DISubprogram(name: "test_inline_strip_conflict", scope: !1, file: !1, line: 10, unit: !0)
+!11 = !DILocation(line: 11, scope: !10)
+!12 = distinct !DISubprogram(name: "test_noinline", scope: !1, file: !1, line: 12, unit: !0)
+!13 = !DILocation(line: 13, scope: !12)
+!14 = distinct !DISubprogram(name: "foo_inline1", scope: !1, file: !1, line: 11, unit: !0)
+!15 = distinct !DISubprogram(name: "foo_inline_strip.suffix", scope: !1, file: !1, line: 1, unit: !0)
+!16 = distinct !DISubprogram(name: "foo_inline_strip_conflict.suffix1", scope: !1, file: !1, line: 1, unit: !0)
+!17 = distinct !DISubprogram(name: "foo_inline_strip_conflict.suffix2", scope: !1, file: !1, line: 1, unit: !0)
+!18 = distinct !DISubprogram(name: "foo_inline_strip_conflict.suffix3", scope: !1, file: !1, line: 1, unit: !0)
+!19 = distinct !DISubprogram(name: "foo_inline2", scope: !1, file: !1, line: 19, unit: !0)
+!20 = distinct !DISubprogram(name: "foo_noinline", scope: !1, file: !1, line: 20, unit: !0)
+!21 = distinct !DISubprogram(name: "foo_direct", scope: !1, file: !1, line: 21, unit: !0)
+!22 = distinct !DISubprogram(name: "test_direct", scope: !1, file: !1, line: 22, unit: !0)
+!23 = !DILocation(line: 23, scope: !22)
+!24 = distinct !DISubprogram(name: "test_norecursive_inline", scope: !1, file: !1, line: 12, unit: !0)
+!25 = !DILocation(line: 13, scope: !24)
+!26 = distinct !DISubprogram(name: "test_noinline_bitcast", scope: !1, file: !1, line: 12, unit: !0)
+!27 = !DILocation(line: 13, scope: !26)
+!28 = distinct !DISubprogram(name: "foo_direct_i32", scope: !1, file: !1, line: 11, unit: !0)
+!29 = distinct !DISubprogram(name: "return_arg", scope: !1, file: !1, line: 11, unit: !0)
+!30 = distinct !DISubprogram(name: "return_arg_caller", scope: !1, file: !1, line: 11, unit: !0)
+!31 = !DILocation(line: 12, scope: !30)
+!32 = !DILocation(line: 13, scope: !30)

Added: llvm/trunk/test/Transforms/SampleProfile/inline-act.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/inline-act.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/inline-act.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/inline-act.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,72 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-act.prof
+
+; Sample profile should have non-empty ACT passed to inliner
+
+; int t;
+; bool foo(int value) {
+;   switch(value) {
+;     case 0:
+;     case 1:
+;     case 3:
+;       return true;
+;     default:
+;       return false;
+;   }
+; }
+; void bar(int i) {
+;   if (foo(i))
+;     t *= 2;
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at t = global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define zeroext i1 @_Z3fooi(i32) #0 {
+  %switch.tableidx = sub i32 %0, 0
+  %2 = icmp ult i32 %switch.tableidx, 4
+  br i1 %2, label %switch.lookup, label %3
+
+switch.lookup:                                    ; preds = %1
+  %switch.cast = trunc i32 %switch.tableidx to i4
+  %switch.shiftamt = mul i4 %switch.cast, 1
+  %switch.downshift = lshr i4 -5, %switch.shiftamt
+  %switch.masked = trunc i4 %switch.downshift to i1
+  ret i1 %switch.masked
+
+; <label>:3:                                      ; preds = %1
+  ret i1 false
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z3bari(i32) #0 !dbg !9 {
+  %2 = call zeroext i1 @_Z3fooi(i32 %0), !dbg !10
+  br i1 %2, label %3, label %6, !dbg !10
+
+; <label>:3:                                      ; preds = %1
+  %4 = load i32, i32* @t, align 4
+  %5 = shl nsw i32 %4, 1
+  store i32 %5, i32* @t, align 4
+  br label %6
+
+; <label>:6:                                      ; preds = %3, %1
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 272227) (llvm/trunk 272226)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "test.cc", directory: "./")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{!"clang version 3.9.0 (trunk 272227) (llvm/trunk 272226)"}
+!6 = !DISubroutineType(types: !2)
+!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 14, type: !6, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
+!10 = !DILocation(line: 15, column: 7, scope: !9)
+!11 = !DILocation(line: 16, column: 7, scope: !9)

Added: llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; For SamplePGO, if -profile-sample-accurate is specified, cold callsite
+; heuristics should be honored if the caller has no profile.
+
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -profile-sample-accurate -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s --check-prefix ACCURATE
+
+declare void @extern()
+define void @callee() {
+  call void @extern()
+  ret void
+}
+
+define void @caller(i32 %y1) {
+; CHECK-LABEL: @caller
+; CHECK-NOT: call void @callee
+; ACCURATE-LABEL: @caller
+; ACCURATE: call void @callee
+  call void @callee()
+  ret void
+}
+
+define void @caller_accurate(i32 %y1) #0 {
+; CHECK-LABEL: @caller_accurate
+; CHECK: call void @callee
+; ACCURATE-LABEL: @caller_accurate
+; ACCURATE: call void @callee
+  call void @callee()
+  ret void
+}
+
+attributes #0 = { "profile-sample-accurate" }

Added: llvm/trunk/test/Transforms/SampleProfile/inline-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/inline-combine.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/inline-combine.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/inline-combine.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/inline-combine.prof -S | FileCheck %s
+; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/inline-combine.prof -S | FileCheck %s
+
+%"class.llvm::FoldingSetNodeID" = type { %"class.llvm::SmallVector" }
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl.base", %"struct.llvm::SmallVectorStorage" }
+%"class.llvm::SmallVectorImpl.base" = type { %"class.llvm::SmallVectorTemplateBase.base" }
+%"class.llvm::SmallVectorTemplateBase.base" = type { %"class.llvm::SmallVectorTemplateCommon.base" }
+%"class.llvm::SmallVectorTemplateCommon.base" = type <{ %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion" }>
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8* }
+%"struct.llvm::AlignedCharArrayUnion" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::AlignedCharArray" = type { [4 x i8] }
+%"struct.llvm::SmallVectorStorage" = type { [31 x %"struct.llvm::AlignedCharArrayUnion"] }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase.base", [4 x i8] }
+
+$foo = comdat any
+
+$bar = comdat any
+
+define void @foo(%"class.llvm::FoldingSetNodeID"* %this) comdat align 2 !dbg !3 {
+  %1 = alloca %"class.llvm::FoldingSetNodeID"*, align 8
+  store %"class.llvm::FoldingSetNodeID"* %this, %"class.llvm::FoldingSetNodeID"** %1, align 8
+  %2 = load %"class.llvm::FoldingSetNodeID"*, %"class.llvm::FoldingSetNodeID"** %1, align 8
+  %3 = getelementptr inbounds %"class.llvm::FoldingSetNodeID", %"class.llvm::FoldingSetNodeID"* %2, i32 0, i32 0
+; the call should have been inlined after sample-profile pass
+; CHECK-NOT: call
+  call void bitcast (void (%"class.llvm::SmallVectorImpl"*)* @bar to void (%"class.llvm::SmallVector"*)*)(%"class.llvm::SmallVector"* %3), !dbg !7
+  ret void
+}
+
+define void @bar(%"class.llvm::SmallVectorImpl"* %this) comdat align 2 !dbg !8 {
+  ret void
+}
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+!llvm.dbg.cu = !{!9}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 1, !"Debug Info Version", i32 3}
+!2 = !{!"clang version 3.5 "}
+!3 = distinct !DISubprogram(name: "foo", scope: !4, file: !4, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !9, retainedNodes: !6)
+!4 = !DIFile(filename: "test.cc", directory: ".")
+!5 = !DISubroutineType(types: !6)
+!6 = !{}
+!7 = !DILocation(line: 4, scope: !3)
+!8 = distinct !DISubprogram(name: "bar", scope: !4, file: !4, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !9, retainedNodes: !6)
+!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: FullDebug, file: !4)

Added: llvm/trunk/test/Transforms/SampleProfile/inline-coverage.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/inline-coverage.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/inline-coverage.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/inline-coverage.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,135 @@
+; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/inline-coverage.prof -sample-profile-check-record-coverage=100 -sample-profile-check-sample-coverage=110 -pass-remarks=sample-profile -pass-remarks-analysis=sample-profile -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/inline-coverage.prof -sample-profile-check-record-coverage=100 -sample-profile-check-sample-coverage=110 -pass-remarks=sample-profile -pass-remarks-analysis=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; Original code:
+;
+;     1    #include <stdlib.h>
+;     2
+;     3    long long int foo(long i) {
+;     4      return rand() * i;
+;     5    }
+;     6
+;     7    int main() {
+;     8      long long int sum = 0;
+;     9      for (int i = 0; i < 200000 * 3000; i++)
+;    10        sum += foo(i);
+;    11      return sum > 0 ? 0 : 1;
+;    12    }
+;
+; CHECK: remark: coverage.cc:10:12: inlined hot callee '_Z3fool' into 'main'
+; CHECK: remark: coverage.cc:9:21: Applied 23478 samples from profile (offset: 2.1)
+; CHECK: remark: coverage.cc:10:16: Applied 23478 samples from profile (offset: 3)
+; CHECK: remark: coverage.cc:4:10: Applied 31878 samples from profile (offset: 1)
+; CHECK: remark: coverage.cc:11:10: Applied 0 samples from profile (offset: 4)
+; CHECK: remark: coverage.cc:10:16: most popular destination for conditional branches at coverage.cc:9:3
+;
+; There is one sample record with 0 samples at offset 4 in main() that we never
+; use:
+; CHECK: warning: coverage.cc:7: 4 of 5 available profile records (80%) were applied
+;
+; Since the unused sample record contributes no samples, sample coverage should
+; be 100%. Note that we get this warning because we are requesting an impossible
+; 110% coverage check.
+; CHECK: warning: coverage.cc:7: 78834 of 78834 available profile samples (100%) were applied
+
+define i64 @_Z3fool(i64 %i) !dbg !4 {
+entry:
+  %i.addr = alloca i64, align 8
+  store i64 %i, i64* %i.addr, align 8
+  call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !16, metadata !17), !dbg !18
+  %call = call i32 @rand(), !dbg !19
+  %conv = sext i32 %call to i64, !dbg !19
+  %0 = load i64, i64* %i.addr, align 8, !dbg !20
+  %mul = mul nsw i64 %conv, %0, !dbg !21
+  ret i64 %mul, !dbg !22
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+declare i32 @rand()
+
+define i32 @main() !dbg !9 {
+entry:
+  %retval = alloca i32, align 4
+  %sum = alloca i64, align 8
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  call void @llvm.dbg.declare(metadata i64* %sum, metadata !23, metadata !17), !dbg !24
+  store i64 0, i64* %sum, align 8, !dbg !24
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !25, metadata !17), !dbg !27
+  store i32 0, i32* %i, align 4, !dbg !27
+  br label %for.cond, !dbg !28
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4, !dbg !29
+  %cmp = icmp slt i32 %0, 600000000, !dbg !32
+  br i1 %cmp, label %for.body, label %for.end, !dbg !33
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, i32* %i, align 4, !dbg !34
+  %conv = sext i32 %1 to i64, !dbg !34
+  %call = call i64 @_Z3fool(i64 %conv), !dbg !35
+  %2 = load i64, i64* %sum, align 8, !dbg !36
+  %add = add nsw i64 %2, %call, !dbg !36
+  store i64 %add, i64* %sum, align 8, !dbg !36
+  br label %for.inc, !dbg !37
+
+for.inc:                                          ; preds = %for.body
+  %3 = load i32, i32* %i, align 4, !dbg !38
+  %inc = add nsw i32 %3, 1, !dbg !38
+  store i32 %inc, i32* %i, align 4, !dbg !38
+  br label %for.cond, !dbg !39
+
+for.end:                                          ; preds = %for.cond
+  %4 = load i64, i64* %sum, align 8, !dbg !40
+  %cmp1 = icmp sgt i64 %4, 0, !dbg !41
+  %cond = select i1 %cmp1, i32 0, i32 1, !dbg !40
+  ret i32 %cond, !dbg !42
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "coverage.cc", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !8}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!12}
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)"}
+!16 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !8)
+!17 = !DIExpression()
+!18 = !DILocation(line: 3, column: 24, scope: !4)
+!19 = !DILocation(line: 4, column: 10, scope: !4)
+!20 = !DILocation(line: 4, column: 19, scope: !4)
+!21 = !DILocation(line: 4, column: 17, scope: !4)
+!22 = !DILocation(line: 4, column: 3, scope: !4)
+!23 = !DILocalVariable(name: "sum", scope: !9, file: !1, line: 8, type: !7)
+!24 = !DILocation(line: 8, column: 17, scope: !9)
+!25 = !DILocalVariable(name: "i", scope: !26, file: !1, line: 9, type: !12)
+!26 = distinct !DILexicalBlock(scope: !9, file: !1, line: 9, column: 3)
+!27 = !DILocation(line: 9, column: 12, scope: !26)
+!28 = !DILocation(line: 9, column: 8, scope: !26)
+!29 = !DILocation(line: 9, column: 19, scope: !30)
+!30 = !DILexicalBlockFile(scope: !31, file: !1, discriminator: 2)
+!31 = distinct !DILexicalBlock(scope: !26, file: !1, line: 9, column: 3)
+!32 = !DILocation(line: 9, column: 21, scope: !30)
+!33 = !DILocation(line: 9, column: 3, scope: !30)
+!34 = !DILocation(line: 10, column: 16, scope: !31)
+!35 = !DILocation(line: 10, column: 12, scope: !31)
+!36 = !DILocation(line: 10, column: 9, scope: !31)
+!37 = !DILocation(line: 10, column: 5, scope: !31)
+!38 = !DILocation(line: 9, column: 39, scope: !31)
+!39 = !DILocation(line: 9, column: 3, scope: !31)
+!40 = !DILocation(line: 11, column: 10, scope: !9)
+!41 = !DILocation(line: 11, column: 14, scope: !9)
+!42 = !DILocation(line: 11, column: 3, scope: !9)

Added: llvm/trunk/test/Transforms/SampleProfile/inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/inline.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/inline.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/inline.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,109 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s
+
+; Original C++ test case
+;
+; #include <stdio.h>
+;
+; int sum(int x, int y) {
+;   return x + y;
+; }
+;
+; int main() {
+;   int s, i = 0;
+;   while (i++ < 20000 * 20000)
+;     if (i != 100) s = sum(i, s); else s = 30;
+;   printf("sum is %d\n", s);
+;   return 0;
+; }
+;
+ at .str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4, !dbg !11
+  %1 = load i32, i32* %y.addr, align 4, !dbg !11
+  %add = add nsw i32 %0, %1, !dbg !11
+  ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() !dbg !7 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !12
+  br label %while.cond, !dbg !13
+
+while.cond:                                       ; preds = %if.end, %entry
+  %0 = load i32, i32* %i, align 4, !dbg !14
+  %inc = add nsw i32 %0, 1, !dbg !14
+  store i32 %inc, i32* %i, align 4, !dbg !14
+  %cmp = icmp slt i32 %0, 400000000, !dbg !14
+  br i1 %cmp, label %while.body, label %while.end, !dbg !14
+
+while.body:                                       ; preds = %while.cond
+  %1 = load i32, i32* %i, align 4, !dbg !16
+  %cmp1 = icmp ne i32 %1, 100, !dbg !16
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+
+
+if.then:                                          ; preds = %while.body
+  %2 = load i32, i32* %i, align 4, !dbg !18
+  %3 = load i32, i32* %s, align 4, !dbg !18
+  %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18
+; CHECK-NOT: call i32 @_Z3sumii
+  store i32 %call, i32* %s, align 4, !dbg !18
+  br label %if.end, !dbg !18
+
+if.else:                                          ; preds = %while.body
+  store i32 30, i32* %s, align 4, !dbg !20
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !22
+
+while.end:                                        ; preds = %while.cond
+  %4 = load i32, i32* %s, align 4, !dbg !24
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "calls.cc", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = !DILocation(line: 4, scope: !4)
+!12 = !DILocation(line: 8, scope: !7)
+!13 = !DILocation(line: 9, scope: !7)
+!14 = !DILocation(line: 9, scope: !15)
+!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7)
+!16 = !DILocation(line: 10, scope: !17)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !19)
+!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17)
+!20 = !DILocation(line: 10, scope: !21)
+!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17)
+!22 = !DILocation(line: 10, scope: !23)
+!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17)
+!24 = !DILocation(line: 11, scope: !7)
+!25 = !DILocation(line: 12, scope: !7)

Added: llvm/trunk/test/Transforms/SampleProfile/nodebug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/nodebug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/nodebug.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/nodebug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/nodebug.prof
+
+define void @foo() !dbg !3 {
+  call void @bar(), !dbg !4
+  ret void
+}
+
+define void @bar() {
+  call void @bar()
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1)
+!1 = !DIFile(filename: "t", directory: "/tmp/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "a", scope: !1, file: !1, line: 10, unit: !0)
+!4 = !DILocation(line: 10, scope: !3)

Added: llvm/trunk/test/Transforms/SampleProfile/nolocinfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/nolocinfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/nolocinfo.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/nolocinfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/nolocinfo.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/nolocinfo.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
+
+define i32 @foo(i32 %i)  !dbg !4 {
+entry:
+  %i.addr = alloca i32, align 4
+  %0 = load i32, i32* %i.addr, align 4
+  %cmp = icmp sgt i32 %0, 1000
+
+; Remarks for conditional branches need debug location information for the
+; referring branch. When that is not present, the compiler should not abort.
+;
+; CHECK: remark: nolocinfo.c:3:5: most popular destination for conditional branches at <UNKNOWN LOCATION>
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  ret i32 0, !dbg !18
+
+if.end:
+  ret i32 1
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "nolocinfo.c", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)"}
+!15 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 7)
+!18 = !DILocation(line: 3, column: 5, scope: !15)

Added: llvm/trunk/test/Transforms/SampleProfile/offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/offset.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/offset.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/offset.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,82 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ code for this test case:
+;
+; a.cc:
+; #1
+; #2
+; #3
+; #4
+; #5 int foo(int a) {
+; #6 #include "a.b"
+; #7}
+;
+; a.b:
+; #1 if (a > 0) {
+; #2   return 10;
+; #3 } else {
+; #4   return 20;
+; #5 }
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3fooi(i32 %a) #0 !dbg !4 {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !11, metadata !12), !dbg !13
+  %0 = load i32, i32* %a.addr, align 4, !dbg !14
+  %cmp = icmp sgt i32 %0, 0, !dbg !18
+  br i1 %cmp, label %if.then, label %if.else, !dbg !19
+; CHECK: edge entry -> if.then probability is 0x0167ba82 / 0x80000000 = 1.10%
+; CHECK: edge entry -> if.else probability is 0x7e98457e / 0x80000000 = 98.90% [HOT edge]
+
+if.then:                                          ; preds = %entry
+  store i32 10, i32* %retval, align 4, !dbg !20
+  br label %return, !dbg !20
+
+if.else:                                          ; preds = %entry
+  store i32 20, i32* %retval, align 4, !dbg !22
+  br label %return, !dbg !22
+
+return:                                           ; preds = %if.else, %if.then
+  %1 = load i32, i32* %retval, align 4, !dbg !24
+  ret i32 %1, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250750)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "a.cc", directory: "/tmp")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 250750)"}
+!11 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 5, type: !7)
+!12 = !DIExpression()
+!13 = !DILocation(line: 5, column: 13, scope: !4)
+!14 = !DILocation(line: 1, column: 5, scope: !15)
+!15 = distinct !DILexicalBlock(scope: !17, file: !16, line: 1, column: 5)
+!16 = !DIFile(filename: "./a.b", directory: "/tmp")
+!17 = !DILexicalBlockFile(scope: !4, file: !16, discriminator: 0)
+!18 = !DILocation(line: 1, column: 7, scope: !15)
+!19 = !DILocation(line: 1, column: 5, scope: !17)
+!20 = !DILocation(line: 2, column: 3, scope: !21)
+!21 = distinct !DILexicalBlock(scope: !15, file: !16, line: 1, column: 12)
+!22 = !DILocation(line: 4, column: 3, scope: !23)
+!23 = distinct !DILexicalBlock(scope: !15, file: !16, line: 3, column: 8)
+!24 = !DILocation(line: 7, column: 1, scope: !25)
+!25 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 0)

Added: llvm/trunk/test/Transforms/SampleProfile/propagate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/propagate.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/propagate.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/propagate.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,317 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/propagate.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/propagate.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ code for this test case:
+;
+; #include <stdio.h>
+;
+; long foo(int x, int y, long N) {
+;   if (x < y) {
+;     return y - x;
+;   } else {
+;     for (long i = 0; i < N; i++) {
+;       if (i > N / 3)
+;         x--;
+;       if (i > N / 4) {
+;         y++;
+;         x += 3;
+;       } else {
+;         for (unsigned j = 0; j < 100; j++) {
+;           x += j;
+;           y -= 3;
+;         }
+;       }
+;     }
+;   }
+;   return y * x;
+; }
+;
+; int main() {
+;   int x = 5678;
+;   int y = 1234;
+;   long N = 9999999;
+;   printf("foo(%d, %d, %ld) = %ld\n", x, y, N, foo(x, y, N));
+;   return 0;
+; }
+
+; ModuleID = 'propagate.cc'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = private unnamed_addr constant [24 x i8] c"foo(%d, %d, %ld) = %ld\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i64 @_Z3fooiil(i32 %x, i32 %y, i64 %N) #0 !dbg !6 {
+entry:
+  %retval = alloca i64, align 8
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  %N.addr = alloca i64, align 8
+  %i = alloca i64, align 8
+  %j = alloca i64, align 8
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !11, metadata !12), !dbg !13
+  store i32 %y, i32* %y.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %y.addr, metadata !14, metadata !12), !dbg !15
+  store i64 %N, i64* %N.addr, align 8
+  call void @llvm.dbg.declare(metadata i64* %N.addr, metadata !16, metadata !12), !dbg !17
+  %0 = load i32, i32* %x.addr, align 4, !dbg !18
+  %1 = load i32, i32* %y.addr, align 4, !dbg !20
+  %cmp = icmp slt i32 %0, %1, !dbg !21
+  br i1 %cmp, label %if.then, label %if.else, !dbg !22
+
+if.then:                                          ; preds = %entry
+  %2 = load i32, i32* %y.addr, align 4, !dbg !23
+  %3 = load i32, i32* %x.addr, align 4, !dbg !25
+  %sub = sub nsw i32 %2, %3, !dbg !26
+  %conv = sext i32 %sub to i64, !dbg !23
+  store i64 %conv, i64* %retval, align 8, !dbg !27
+  br label %return, !dbg !27
+
+if.else:                                          ; preds = %entry
+  call void @llvm.dbg.declare(metadata i64* %i, metadata !28, metadata !12), !dbg !31
+  store i64 0, i64* %i, align 8, !dbg !31
+  br label %for.cond, !dbg !32
+
+for.cond:                                         ; preds = %for.inc17, %if.else
+  %4 = load i64, i64* %i, align 8, !dbg !33
+  %5 = load i64, i64* %N.addr, align 8, !dbg !36
+  %cmp1 = icmp slt i64 %4, %5, !dbg !37
+  br i1 %cmp1, label %for.body, label %for.end19, !dbg !38
+
+for.body:                                         ; preds = %for.cond
+  %6 = load i64, i64* %i, align 8, !dbg !39
+  %7 = load i64, i64* %N.addr, align 8, !dbg !42
+  %div = sdiv i64 %7, 3, !dbg !43
+  %cmp2 = icmp sgt i64 %6, %div, !dbg !44
+  br i1 %cmp2, label %if.then3, label %if.end, !dbg !45
+; CHECK:  edge for.body -> if.then3 probability is 0x51292fa6 / 0x80000000 = 63.41%
+; CHECK:  edge for.body -> if.end probability is 0x2ed6d05a / 0x80000000 = 36.59%
+
+if.then3:                                         ; preds = %for.body
+  %8 = load i32, i32* %x.addr, align 4, !dbg !46
+  %dec = add nsw i32 %8, -1, !dbg !46
+  store i32 %dec, i32* %x.addr, align 4, !dbg !46
+  br label %if.end, !dbg !47
+
+if.end:                                           ; preds = %if.then3, %for.body
+  %9 = load i64, i64* %i, align 8, !dbg !48
+  %10 = load i64, i64* %N.addr, align 8, !dbg !50
+  %div4 = sdiv i64 %10, 4, !dbg !51
+  %cmp5 = icmp sgt i64 %9, %div4, !dbg !52
+  br i1 %cmp5, label %if.then6, label %if.else7, !dbg !53
+; CHECK:  edge if.end -> if.then6 probability is 0x5d89d89e / 0x80000000 = 73.08%
+; CHECK:  edge if.end -> if.else7 probability is 0x22762762 / 0x80000000 = 26.92%
+
+if.then6:                                         ; preds = %if.end
+  %11 = load i32, i32* %y.addr, align 4, !dbg !54
+  %inc = add nsw i32 %11, 1, !dbg !54
+  store i32 %inc, i32* %y.addr, align 4, !dbg !54
+  %12 = load i32, i32* %x.addr, align 4, !dbg !56
+  %add = add nsw i32 %12, 3, !dbg !56
+  store i32 %add, i32* %x.addr, align 4, !dbg !56
+  br label %if.end16, !dbg !57
+
+if.else7:                                         ; preds = %if.end
+  call void @llvm.dbg.declare(metadata i64* %j, metadata !58, metadata !12), !dbg !62
+  store i64 0, i64* %j, align 8, !dbg !62
+  br label %for.cond8, !dbg !63
+
+for.cond8:                                        ; preds = %for.inc, %if.else7
+  %13 = load i64, i64* %j, align 8, !dbg !64
+  %cmp9 = icmp slt i64 %13, 100, !dbg !67
+  br i1 %cmp9, label %for.body10, label %for.end, !dbg !68
+; CHECK: edge for.cond8 -> for.body10 probability is 0x7e941a89 / 0x80000000 = 98.89% [HOT edge]
+; CHECK: edge for.cond8 -> for.end probability is 0x016be577 / 0x80000000 = 1.11%
+
+
+for.body10:                                       ; preds = %for.cond8
+  %14 = load i64, i64* %j, align 8, !dbg !69
+  %15 = load i32, i32* %x.addr, align 4, !dbg !71
+  %conv11 = sext i32 %15 to i64, !dbg !71
+  %add12 = add nsw i64 %conv11, %14, !dbg !71
+  %conv13 = trunc i64 %add12 to i32, !dbg !71
+  store i32 %conv13, i32* %x.addr, align 4, !dbg !71
+  %16 = load i32, i32* %y.addr, align 4, !dbg !72
+  %sub14 = sub nsw i32 %16, 3, !dbg !72
+  store i32 %sub14, i32* %y.addr, align 4, !dbg !72
+  br label %for.inc, !dbg !73
+
+for.inc:                                          ; preds = %for.body10
+  %17 = load i64, i64* %j, align 8, !dbg !74
+  %inc15 = add nsw i64 %17, 1, !dbg !74
+  store i64 %inc15, i64* %j, align 8, !dbg !74
+  br label %for.cond8, !dbg !76
+
+for.end:                                          ; preds = %for.cond8
+  br label %if.end16
+
+if.end16:                                         ; preds = %for.end, %if.then6
+  br label %for.inc17, !dbg !77
+
+for.inc17:                                        ; preds = %if.end16
+  %18 = load i64, i64* %i, align 8, !dbg !78
+  %inc18 = add nsw i64 %18, 1, !dbg !78
+  store i64 %inc18, i64* %i, align 8, !dbg !78
+  br label %for.cond, !dbg !80
+
+for.end19:                                        ; preds = %for.cond
+  br label %if.end20
+
+if.end20:                                         ; preds = %for.end19
+  %19 = load i32, i32* %y.addr, align 4, !dbg !81
+  %20 = load i32, i32* %x.addr, align 4, !dbg !82
+  %mul = mul nsw i32 %19, %20, !dbg !83
+  %conv21 = sext i32 %mul to i64, !dbg !81
+  store i64 %conv21, i64* %retval, align 8, !dbg !84
+  br label %return, !dbg !84
+
+return:                                           ; preds = %if.end20, %if.then
+  %21 = load i64, i64* %retval, align 8, !dbg !85
+  ret i64 %21, !dbg !85
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: norecurse uwtable
+define i32 @main() #2 !dbg !86 {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %N = alloca i64, align 8
+  store i32 0, i32* %retval, align 4
+  call void @llvm.dbg.declare(metadata i32* %x, metadata !89, metadata !12), !dbg !90
+  store i32 5678, i32* %x, align 4, !dbg !90
+  call void @llvm.dbg.declare(metadata i32* %y, metadata !91, metadata !12), !dbg !92
+  store i32 1234, i32* %y, align 4, !dbg !92
+  call void @llvm.dbg.declare(metadata i64* %N, metadata !93, metadata !12), !dbg !94
+  store i64 9999999, i64* %N, align 8, !dbg !94
+  %0 = load i32, i32* %x, align 4, !dbg !95
+  %1 = load i32, i32* %y, align 4, !dbg !96
+  %2 = load i64, i64* %N, align 8, !dbg !97
+  %3 = load i32, i32* %x, align 4, !dbg !98
+  %4 = load i32, i32* %y, align 4, !dbg !99
+  %5 = load i64, i64* %N, align 8, !dbg !100
+  %call = call i64 @_Z3fooiil(i32 %3, i32 %4, i64 %5), !dbg !101
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i64 %2, i64 %call), !dbg !102
+  ret i32 0, !dbg !104
+}
+
+declare i32 @printf(i8*, ...) #3
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { norecurse uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 266819)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "propagate.cc", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.9.0 (trunk 266819)"}
+!6 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooiil", scope: !1, file: !1, line: 3, type: !7, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !10, !10, !9}
+!9 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "x", arg: 1, scope: !6, file: !1, line: 3, type: !10)
+!12 = !DIExpression()
+!13 = !DILocation(line: 3, column: 14, scope: !6)
+!14 = !DILocalVariable(name: "y", arg: 2, scope: !6, file: !1, line: 3, type: !10)
+!15 = !DILocation(line: 3, column: 21, scope: !6)
+!16 = !DILocalVariable(name: "N", arg: 3, scope: !6, file: !1, line: 3, type: !9)
+!17 = !DILocation(line: 3, column: 29, scope: !6)
+!18 = !DILocation(line: 4, column: 7, scope: !19)
+!19 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 7)
+!20 = !DILocation(line: 4, column: 11, scope: !19)
+!21 = !DILocation(line: 4, column: 9, scope: !19)
+!22 = !DILocation(line: 4, column: 7, scope: !6)
+!23 = !DILocation(line: 5, column: 12, scope: !24)
+!24 = distinct !DILexicalBlock(scope: !19, file: !1, line: 4, column: 14)
+!25 = !DILocation(line: 5, column: 16, scope: !24)
+!26 = !DILocation(line: 5, column: 14, scope: !24)
+!27 = !DILocation(line: 5, column: 5, scope: !24)
+!28 = !DILocalVariable(name: "i", scope: !29, file: !1, line: 7, type: !9)
+!29 = distinct !DILexicalBlock(scope: !30, file: !1, line: 7, column: 5)
+!30 = distinct !DILexicalBlock(scope: !19, file: !1, line: 6, column: 10)
+!31 = !DILocation(line: 7, column: 15, scope: !29)
+!32 = !DILocation(line: 7, column: 10, scope: !29)
+!33 = !DILocation(line: 7, column: 22, scope: !34)
+!34 = !DILexicalBlockFile(scope: !35, file: !1, discriminator: 2)
+!35 = distinct !DILexicalBlock(scope: !29, file: !1, line: 7, column: 5)
+!36 = !DILocation(line: 7, column: 26, scope: !34)
+!37 = !DILocation(line: 7, column: 24, scope: !34)
+!38 = !DILocation(line: 7, column: 5, scope: !34)
+!39 = !DILocation(line: 8, column: 11, scope: !40)
+!40 = distinct !DILexicalBlock(scope: !41, file: !1, line: 8, column: 11)
+!41 = distinct !DILexicalBlock(scope: !35, file: !1, line: 7, column: 34)
+!42 = !DILocation(line: 8, column: 15, scope: !40)
+!43 = !DILocation(line: 8, column: 17, scope: !40)
+!44 = !DILocation(line: 8, column: 13, scope: !40)
+!45 = !DILocation(line: 8, column: 11, scope: !41)
+!46 = !DILocation(line: 9, column: 10, scope: !40)
+!47 = !DILocation(line: 9, column: 9, scope: !40)
+!48 = !DILocation(line: 10, column: 11, scope: !49)
+!49 = distinct !DILexicalBlock(scope: !41, file: !1, line: 10, column: 11)
+!50 = !DILocation(line: 10, column: 15, scope: !49)
+!51 = !DILocation(line: 10, column: 17, scope: !49)
+!52 = !DILocation(line: 10, column: 13, scope: !49)
+!53 = !DILocation(line: 10, column: 11, scope: !41)
+!54 = !DILocation(line: 11, column: 10, scope: !55)
+!55 = distinct !DILexicalBlock(scope: !49, file: !1, line: 10, column: 22)
+!56 = !DILocation(line: 12, column: 11, scope: !55)
+!57 = !DILocation(line: 13, column: 7, scope: !55)
+!58 = !DILocalVariable(name: "j", scope: !59, file: !1, line: 14, type: !61)
+!59 = distinct !DILexicalBlock(scope: !60, file: !1, line: 14, column: 9)
+!60 = distinct !DILexicalBlock(scope: !49, file: !1, line: 13, column: 14)
+!61 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!62 = !DILocation(line: 14, column: 24, scope: !59)
+!63 = !DILocation(line: 14, column: 14, scope: !59)
+!64 = !DILocation(line: 14, column: 31, scope: !65)
+!65 = !DILexicalBlockFile(scope: !66, file: !1, discriminator: 2)
+!66 = distinct !DILexicalBlock(scope: !59, file: !1, line: 14, column: 9)
+!67 = !DILocation(line: 14, column: 33, scope: !65)
+!68 = !DILocation(line: 14, column: 9, scope: !65)
+!69 = !DILocation(line: 15, column: 16, scope: !70)
+!70 = distinct !DILexicalBlock(scope: !66, file: !1, line: 14, column: 45)
+!71 = !DILocation(line: 15, column: 13, scope: !70)
+!72 = !DILocation(line: 16, column: 13, scope: !70)
+!73 = !DILocation(line: 17, column: 9, scope: !70)
+!74 = !DILocation(line: 14, column: 41, scope: !75)
+!75 = !DILexicalBlockFile(scope: !66, file: !1, discriminator: 4)
+!76 = !DILocation(line: 14, column: 9, scope: !75)
+!77 = !DILocation(line: 19, column: 5, scope: !41)
+!78 = !DILocation(line: 7, column: 30, scope: !79)
+!79 = !DILexicalBlockFile(scope: !35, file: !1, discriminator: 4)
+!80 = !DILocation(line: 7, column: 5, scope: !79)
+!81 = !DILocation(line: 21, column: 10, scope: !6)
+!82 = !DILocation(line: 21, column: 14, scope: !6)
+!83 = !DILocation(line: 21, column: 12, scope: !6)
+!84 = !DILocation(line: 21, column: 3, scope: !6)
+!85 = !DILocation(line: 22, column: 1, scope: !6)
+!86 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 24, type: !87, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
+!87 = !DISubroutineType(types: !88)
+!88 = !{!10}
+!89 = !DILocalVariable(name: "x", scope: !86, file: !1, line: 25, type: !10)
+!90 = !DILocation(line: 25, column: 7, scope: !86)
+!91 = !DILocalVariable(name: "y", scope: !86, file: !1, line: 26, type: !10)
+!92 = !DILocation(line: 26, column: 7, scope: !86)
+!93 = !DILocalVariable(name: "N", scope: !86, file: !1, line: 27, type: !9)
+!94 = !DILocation(line: 27, column: 8, scope: !86)
+!95 = !DILocation(line: 28, column: 38, scope: !86)
+!96 = !DILocation(line: 28, column: 41, scope: !86)
+!97 = !DILocation(line: 28, column: 44, scope: !86)
+!98 = !DILocation(line: 28, column: 51, scope: !86)
+!99 = !DILocation(line: 28, column: 54, scope: !86)
+!100 = !DILocation(line: 28, column: 57, scope: !86)
+!101 = !DILocation(line: 28, column: 47, scope: !86)
+!102 = !DILocation(line: 28, column: 3, scope: !103)
+!103 = !DILexicalBlockFile(scope: !86, file: !1, discriminator: 2)
+!104 = !DILocation(line: 29, column: 3, scope: !86)

Added: llvm/trunk/test/Transforms/SampleProfile/remap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/remap.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/remap.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/remap.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,60 @@
+; RUN: opt %s -passes=sample-profile -sample-profile-file=%S/Inputs/remap.prof -sample-profile-remapping-file=%S/Inputs/remap.map | opt -analyze -branch-prob | FileCheck %s
+
+; Reduced from branch.ll
+
+declare i1 @foo()
+
+define void @_ZN3foo3barERKN1M1XINS_6detail3quxEEE() !dbg !2 {
+; CHECK: Printing analysis 'Branch Probability Analysis' for function '_ZN3foo3barERKN1M1XINS_6detail3quxEEE':
+
+entry:
+  %cmp = call i1 @foo(), !dbg !6
+  br i1 %cmp, label %if.then, label %if.end
+; CHECK:  edge entry -> if.then probability is 0x4ccf6b16 / 0x80000000 = 60.01%
+; CHECK:  edge entry -> if.end probability is 0x333094ea / 0x80000000 = 39.99%
+
+if.then:
+  br label %return
+
+if.end:
+  %cmp1 = call i1 @foo(), !dbg !7
+  br i1 %cmp1, label %if.then.2, label %if.else
+; CHECK: edge if.end -> if.then.2 probability is 0x6652c748 / 0x80000000 = 79.94%
+; CHECK: edge if.end -> if.else probability is 0x19ad38b8 / 0x80000000 = 20.06%
+
+if.then.2:
+  call i1 @foo(), !dbg !8
+  br label %for.cond
+
+for.cond:
+  %cmp5 = call i1 @foo()
+  br i1 %cmp5, label %for.body, label %for.end, !prof !9
+; CHECK: edge for.cond -> for.body probability is 0x73333333 / 0x80000000 = 90.00%
+; CHECK: edge for.cond -> for.end probability is 0x0ccccccd / 0x80000000 = 10.00%
+
+for.body:
+  br label %for.cond
+
+for.end:
+  br label %return
+
+if.else:
+  br label %return
+
+return:
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!4, !5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "foo++", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !{}, retainedTypes: !{})
+!1 = !DIFile(filename: "test.cc", directory: "/foo/bar")
+!2 = distinct !DISubprogram(name: "_ZN3foo3barERKN1M1XINS_6detail3quxEEE", scope: !1, file: !1, line: 4, type: !3, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !{})
+!3 = !DISubroutineType(types: !{})
+!4 = !{i32 2, !"Dwarf Version", i32 4}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = !DILocation(line: 5, column: 8, scope: !2)
+!7 = !DILocation(line: 8, column: 6, scope: !2)
+!8 = !DILocation(line: 10, column: 11, scope: !2)
+!9 = !{!"branch_weights", i32 90, i32 10}

Added: llvm/trunk/test/Transforms/SampleProfile/remarks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/remarks.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/remarks.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/remarks.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,225 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/remarks.prof -S -pass-remarks=sample-profile -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s
+; RUN: FileCheck %s -check-prefix=YAML < %t.opt.yaml
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/remarks.prof -S -pass-remarks=sample-profile -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s
+; RUN: FileCheck %s -check-prefix=YAML < %t.opt.yaml
+
+; Original test case.
+;
+;     1    #include <stdlib.h>
+;     2
+;     3    long long foo() {
+;     4      long long int sum = 0;
+;     5      for (int i = 0; i < 500000000; i++)
+;     6        if (i < 1000)
+;     7          sum -= i;
+;     8        else
+;     9          sum += -i * rand();
+;    10      return sum;
+;    11    }
+;    12
+;    13    int main() { return foo() > 0; }
+
+; We are expecting foo() to be inlined in main() (almost all the cycles are
+; spent inside foo).
+; CHECK: remark: remarks.cc:13:21: inlined hot callee '_Z3foov' into 'main'
+
+; The back edge for the loop is the hottest edge in the loop subgraph.
+; CHECK: remark: remarks.cc:6:9: most popular destination for conditional branches at remarks.cc:5:3
+
+; The predicate almost always chooses the 'else' branch.
+; CHECK: remark: remarks.cc:9:15: most popular destination for conditional branches at remarks.cc:6:9
+
+; Checking to see if YAML file is generated and contains remarks
+;YAML:       --- !Passed
+;YAML-NEXT:  Pass:            sample-profile
+;YAML-NEXT:  Name:            HotInline
+;YAML-NEXT:  DebugLoc:        { File: remarks.cc, Line: 13, Column: 21 }
+;YAML-NEXT:  Function:        main
+;YAML-NEXT:  Args:
+;YAML-NEXT:    - String:          'inlined hot callee '''
+;YAML-NEXT:    - Callee:          _Z3foov
+;YAML-NEXT:      DebugLoc:        { File: remarks.cc, Line: 3, Column: 0 }
+;YAML-NEXT:    - String:          ''' into '''
+;YAML-NEXT:    - Caller:          main
+;YAML-NEXT:        DebugLoc:        { File: remarks.cc, Line: 13, Column: 0 }
+;YAML-NEXT:    - String:          ''''
+;YAML-NEXT:  ...
+;YAML:  --- !Analysis
+;YAML-NEXT:  Pass:            sample-profile
+;YAML-NEXT:  Name:            AppliedSamples
+;YAML-NEXT:  DebugLoc:        { File: remarks.cc, Line: 5, Column: 8 }
+;YAML-NEXT:  Function:        main
+;YAML-NEXT:  Args:
+;YAML-NEXT:    - String:          'Applied '
+;YAML-NEXT:    - NumSamples:      '18305'
+;YAML-NEXT:    - String:          ' samples from profile (offset: '
+;YAML-NEXT:    - LineOffset:      '2'
+;YAML-NEXT:    - String:          ')'
+;YAML-NEXT:  ...
+;YAML:  --- !Passed
+;YAML-NEXT:  Pass:            sample-profile
+;YAML-NEXT:  Name:            PopularDest
+;YAML-NEXT:  DebugLoc:        { File: remarks.cc, Line: 6, Column: 9 }
+;YAML-NEXT:  Function:        main
+;YAML-NEXT:  Args:
+;YAML-NEXT:    - String:          'most popular destination for conditional branches at '
+;YAML-NEXT:    - CondBranchesLoc: 'remarks.cc:5:3'
+;YAML-NEXT:      DebugLoc:        { File: remarks.cc, Line: 5, Column: 3 }
+;YAML-NEXT:  ...
+
+; Function Attrs: nounwind uwtable
+define i64 @_Z3foov() #0 !dbg !4 {
+entry:
+  %sum = alloca i64, align 8
+  %i = alloca i32, align 4
+  %0 = bitcast i64* %sum to i8*, !dbg !19
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* %0) #4, !dbg !19
+  call void @llvm.dbg.declare(metadata i64* %sum, metadata !9, metadata !20), !dbg !21
+  store i64 0, i64* %sum, align 8, !dbg !21, !tbaa !22
+  %1 = bitcast i32* %i to i8*, !dbg !26
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %1) #4, !dbg !26
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !10, metadata !20), !dbg !27
+  store i32 0, i32* %i, align 4, !dbg !27, !tbaa !28
+  br label %for.cond, !dbg !26
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %2 = load i32, i32* %i, align 4, !dbg !30, !tbaa !28
+  %cmp = icmp slt i32 %2, 500000000, !dbg !34
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !35
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  %3 = bitcast i32* %i to i8*, !dbg !36
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %3) #4, !dbg !36
+  br label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32, i32* %i, align 4, !dbg !38, !tbaa !28
+  %cmp1 = icmp slt i32 %4, 1000, !dbg !40
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !41
+
+if.then:                                          ; preds = %for.body
+  %5 = load i32, i32* %i, align 4, !dbg !42, !tbaa !28
+  %conv = sext i32 %5 to i64, !dbg !42
+  %6 = load i64, i64* %sum, align 8, !dbg !43, !tbaa !22
+  %sub = sub nsw i64 %6, %conv, !dbg !43
+  store i64 %sub, i64* %sum, align 8, !dbg !43, !tbaa !22
+  br label %if.end, !dbg !44
+
+if.else:                                          ; preds = %for.body
+  %7 = load i32, i32* %i, align 4, !dbg !45, !tbaa !28
+  %sub2 = sub nsw i32 0, %7, !dbg !46
+  %call = call i32 @rand() #4, !dbg !47
+  %mul = mul nsw i32 %sub2, %call, !dbg !48
+  %conv3 = sext i32 %mul to i64, !dbg !46
+  %8 = load i64, i64* %sum, align 8, !dbg !49, !tbaa !22
+  %add = add nsw i64 %8, %conv3, !dbg !49
+  store i64 %add, i64* %sum, align 8, !dbg !49, !tbaa !22
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %for.inc, !dbg !50
+
+for.inc:                                          ; preds = %if.end
+  %9 = load i32, i32* %i, align 4, !dbg !51, !tbaa !28
+  %inc = add nsw i32 %9, 1, !dbg !51
+  store i32 %inc, i32* %i, align 4, !dbg !51, !tbaa !28
+  br label %for.cond, !dbg !52
+
+for.end:                                          ; preds = %for.cond.cleanup
+  %10 = load i64, i64* %sum, align 8, !dbg !53, !tbaa !22
+  %11 = bitcast i64* %sum to i8*, !dbg !54
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* %11) #4, !dbg !54
+  ret i64 %10, !dbg !55
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
+
+; Function Attrs: nounwind
+declare i32 @rand() #3
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !13 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  %call = call i64 @_Z3foov(), !dbg !56
+  %cmp = icmp sgt i64 %call, 0, !dbg !57
+  %conv = zext i1 %cmp to i32, !dbg !56
+  ret i32 %conv, !dbg !58
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind argmemonly }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "remarks.cc", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !{!9, !10}
+!9 = !DILocalVariable(name: "sum", scope: !4, file: !1, line: 4, type: !7)
+!10 = !DILocalVariable(name: "i", scope: !11, file: !1, line: 5, type: !12)
+!11 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5, column: 3)
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
+!14 = !DISubroutineType(types: !15)
+!15 = !{!12}
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)"}
+!19 = !DILocation(line: 4, column: 3, scope: !4)
+!20 = !DIExpression()
+!21 = !DILocation(line: 4, column: 17, scope: !4)
+!22 = !{!23, !23, i64 0}
+!23 = !{!"long long", !24, i64 0}
+!24 = !{!"omnipotent char", !25, i64 0}
+!25 = !{!"Simple C/C++ TBAA"}
+!26 = !DILocation(line: 5, column: 8, scope: !11)
+!27 = !DILocation(line: 5, column: 12, scope: !11)
+!28 = !{!29, !29, i64 0}
+!29 = !{!"int", !24, i64 0}
+!30 = !DILocation(line: 5, column: 19, scope: !31)
+!31 = !DILexicalBlockFile(scope: !32, file: !1, discriminator: 3)
+!32 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 1)
+!33 = distinct !DILexicalBlock(scope: !11, file: !1, line: 5, column: 3)
+!34 = !DILocation(line: 5, column: 21, scope: !33)
+!35 = !DILocation(line: 5, column: 3, scope: !11)
+!36 = !DILocation(line: 5, column: 3, scope: !37)
+!37 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 2)
+!38 = !DILocation(line: 6, column: 9, scope: !39)
+!39 = distinct !DILexicalBlock(scope: !33, file: !1, line: 6, column: 9)
+!40 = !DILocation(line: 6, column: 11, scope: !39)
+!41 = !DILocation(line: 6, column: 9, scope: !33)
+!42 = !DILocation(line: 7, column: 14, scope: !39)
+!43 = !DILocation(line: 7, column: 11, scope: !39)
+!44 = !DILocation(line: 7, column: 7, scope: !39)
+!45 = !DILocation(line: 9, column: 15, scope: !39)
+!46 = !DILocation(line: 9, column: 14, scope: !39)
+!47 = !DILocation(line: 9, column: 19, scope: !39)
+!48 = !DILocation(line: 9, column: 17, scope: !39)
+!49 = !DILocation(line: 9, column: 11, scope: !39)
+!50 = !DILocation(line: 6, column: 13, scope: !39)
+!51 = !DILocation(line: 5, column: 35, scope: !33)
+!52 = !DILocation(line: 5, column: 3, scope: !33)
+!53 = !DILocation(line: 10, column: 10, scope: !4)
+!54 = !DILocation(line: 11, column: 1, scope: !4)
+!55 = !DILocation(line: 10, column: 3, scope: !4)
+!56 = !DILocation(line: 13, column: 21, scope: !13)
+!57 = !DILocation(line: 13, column: 27, scope: !13)
+!58 = !DILocation(line: 13, column: 14, scope: !13)

Added: llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,52 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE
+
+target triple = "x86_64-pc-linux-gnu"
+
+; The test checks that function without profile gets unlikely section prefix
+; if -profile-sample-accurate is specified or the function has the
+; profile-sample-accurate attribute.
+
+declare void @hot_func()
+
+; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
+; CHECK: foo_not_in_profile{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]]
+; ACCURATE: foo_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
+; The function not appearing in profile is cold when -profile-sample-accurate
+; is on.
+define void @foo_not_in_profile() {
+  call void @hot_func()
+  ret void
+}
+
+; CHECK: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
+; ACCURATE: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
+; The function not appearing in profile is cold when the func has
+; profile-sample-accurate attribute.
+define void @bar_not_in_profile() #0 {
+  call void @hot_func()
+  ret void
+}
+
+attributes #0 = { "profile-sample-accurate" }
+
+; CHECK: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1}
+; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
+; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
+; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
+; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}

Added: llvm/trunk/test/Transforms/SampleProfile/summary.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/summary.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/summary.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/summary.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; Test that we annotate entire program's summary to IR.
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/summary.prof -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/summary.prof -S | opt -sample-profile -sample-profile-file=%S/Inputs/summary.prof -S | FileCheck %s
+
+define i32 @bar() #0 !dbg !1 {
+entry:
+  ret i32 1, !dbg !2
+}
+
+define i32 @baz() !dbg !3 {
+entry:
+    %call = call i32 @bar(), !dbg !4
+    ret i32 %call, !dbg !5
+}
+
+; CHECK-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
+; CHECK-DAG: {{![0-9]+}} = !{!"TotalCount", i64 900}
+; CHECK-DAG: {{![0-9]+}} = !{!"NumCounts", i64 5}
+; CHECK-DAG: {{![0-9]+}} = !{!"NumFunctions", i64 3}
+; CHECK-DAG: {{![0-9]+}} = !{!"MaxFunctionCount", i64 3}
+
+!1 = distinct !DISubprogram(name: "bar")
+!2 = !DILocation(line: 2, scope: !2)
+!3 = distinct !DISubprogram(name: "baz")
+!4 = !DILocation(line: 1, scope: !4)
+!5 = !DILocation(line: 2, scope: !5)

Added: llvm/trunk/test/Transforms/SampleProfile/syntax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/syntax.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/syntax.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/syntax.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/syntax.prof 2>&1 | FileCheck -check-prefix=NO-DEBUG %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=missing.prof 2>&1 | FileCheck -check-prefix=MISSING-FILE %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_fn_header.prof 2>&1 | FileCheck -check-prefix=BAD-FN-HEADER %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_sample_line.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLE-LINE %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_line_values.prof 2>&1 | FileCheck -check-prefix=BAD-LINE-VALUES %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_discriminator_value.prof 2>&1 | FileCheck -check-prefix=BAD-DISCRIMINATOR-VALUE %s
+; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_samples.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLES %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_mangle.prof 2>&1 >/dev/null
+
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/syntax.prof 2>&1 | FileCheck -check-prefix=NO-DEBUG %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=missing.prof 2>&1 | FileCheck -check-prefix=MISSING-FILE %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_fn_header.prof 2>&1 | FileCheck -check-prefix=BAD-FN-HEADER %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_sample_line.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLE-LINE %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_line_values.prof 2>&1 | FileCheck -check-prefix=BAD-LINE-VALUES %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_discriminator_value.prof 2>&1 | FileCheck -check-prefix=BAD-DISCRIMINATOR-VALUE %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_samples.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLES %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_mangle.prof 2>&1 >/dev/null
+
+define void @empty() {
+entry:
+  ret void
+}
+; NO-DEBUG: warning: No debug information found in function empty: Function profile not used
+; MISSING-FILE: missing.prof: Could not open profile:
+; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof: Could not open profile: Unrecognized sample profile encoding format
+; BAD-SAMPLE-LINE: error: {{.*}}bad_sample_line.prof:3: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1: BAD
+; BAD-LINE-VALUES: error: {{.*}}bad_line_values.prof:2: Expected 'mangled_name:NUM:NUM', found -1: 10
+; BAD-DISCRIMINATOR-VALUE: error: {{.*}}bad_discriminator_value.prof:2: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1.-3: 10
+; BAD-SAMPLES: error: {{.*}}bad_samples.prof:2: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1.3: -10

Added: llvm/trunk/test/Transforms/SampleProfile/warm-inline-instance.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/warm-inline-instance.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/warm-inline-instance.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/warm-inline-instance.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,115 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/warm-inline-instance.prof -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/warm-inline-instance.prof -S | FileCheck %s
+
+ at .str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @foo(i32 %x, i32 %y) !dbg !4 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %t0 = load i32, i32* %x.addr, align 4, !dbg !11
+  %t1 = load i32, i32* %y.addr, align 4, !dbg !11
+  %add = add nsw i32 %t0, %t1, !dbg !11
+  ret i32 %add, !dbg !11
+}
+
+define i32 @goo(i32 %x, i32 %y) {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %t0 = load i32, i32* %x.addr, align 4, !dbg !11
+  %t1 = load i32, i32* %y.addr, align 4, !dbg !11
+  %add = add nsw i32 %t0, %t1, !dbg !11
+  ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() !dbg !7 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !12
+  br label %while.cond, !dbg !13
+
+while.cond:                                       ; preds = %if.end, %entry
+  %t0 = load i32, i32* %i, align 4, !dbg !14
+  %inc = add nsw i32 %t0, 1, !dbg !14
+  store i32 %inc, i32* %i, align 4, !dbg !14
+  %cmp = icmp slt i32 %t0, 400000000, !dbg !14
+  br i1 %cmp, label %while.body, label %while.end, !dbg !14
+
+while.body:                                       ; preds = %while.cond
+  %t1 = load i32, i32* %i, align 4, !dbg !16
+  %cmp1 = icmp ne i32 %t1, 100, !dbg !16
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+
+if.then:                                          ; preds = %while.body
+  %t2 = load i32, i32* %i, align 4, !dbg !18
+  %t3 = load i32, i32* %s, align 4, !dbg !18
+; Although the ratio of total samples of @foo vs total samples of @main is
+; small, since the total samples count is larger than hot cutoff computed by
+; ProfileSummaryInfo, we will still regard the callsite of foo as hot and
+; early inlining will inline it.
+; CHECK-LABEL: @main(
+; CHECK-NOT: call i32 @foo(i32 %t2, i32 %t3)
+  %call1 = call i32 @foo(i32 %t2, i32 %t3), !dbg !18
+  store i32 %call1, i32* %s, align 4, !dbg !18
+  br label %if.end, !dbg !18
+
+if.else:                                          ; preds = %while.body
+; call @goo 's basicblock doesn't get any sample, so no profile will be annotated.
+; CHECK: call i32 @goo(i32 2, i32 3), !dbg !{{[0-9]+}}
+; CHECK-NOT: !prof
+; CHECK-SAME: {{$}}
+  %call2 = call i32 @goo(i32 2, i32 3), !dbg !26
+  store i32 %call2, i32* %s, align 4, !dbg !20
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !22
+
+while.end:                                        ; preds = %while.cond
+  %t4 = load i32, i32* %s, align 4, !dbg !24
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %t4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "calls.cc", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = !DILocation(line: 4, scope: !4)
+!12 = !DILocation(line: 8, scope: !7)
+!13 = !DILocation(line: 9, scope: !7)
+!14 = !DILocation(line: 9, scope: !15)
+!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7)
+!16 = !DILocation(line: 10, scope: !17)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !19)
+!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17)
+!20 = !DILocation(line: 10, scope: !21)
+!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17)
+!22 = !DILocation(line: 10, scope: !23)
+!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17)
+!24 = !DILocation(line: 11, scope: !7)
+!25 = !DILocation(line: 12, scope: !7)
+!26 = !DILocation(line: 11, scope: !19)

Added: llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll (added)
+++ llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S %s -scalarize-masked-mem-intrin -mtriple=x86_64-linux-gnu | FileCheck %s
+
+define void @scalarize_v2i64(i64* %p, <2 x i1> %mask, <2 x i64> %data) {
+; CHECK-LABEL: @scalarize_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[COND_STORE:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.store:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
+; CHECK-NEXT:    store i64 [[TMP2]], i64* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
+; CHECK-NEXT:    br i1 [[TMP4]], label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.store1:
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
+; CHECK-NEXT:    store i64 [[TMP5]], i64* [[PTR_PHI_ELSE]], align 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> %mask)
+  ret void
+}
+
+define void @scalarize_v2i64_ones_mask(i64* %p, <2 x i64> %data) {
+; CHECK-LABEL: @scalarize_v2i64_ones_mask(
+; CHECK-NEXT:    br i1 true, label [[COND_STORE:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.store:
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    br i1 true, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.store1:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
+; CHECK-NEXT:    store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> <i1 true, i1 true>)
+  ret void
+}
+
+define void @scalarize_v2i64_zero_mask(i64* %p, <2 x i64> %data) {
+; CHECK-LABEL: @scalarize_v2i64_zero_mask(
+; CHECK-NEXT:    br i1 false, label [[COND_STORE:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.store:
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    br i1 false, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.store1:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
+; CHECK-NEXT:    store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> <i1 false, i1 false>)
+  ret void
+}
+
+define void @scalarize_v2i64_const_mask(i64* %p, <2 x i64> %data) {
+; CHECK-LABEL: @scalarize_v2i64_const_mask(
+; CHECK-NEXT:    br i1 false, label [[COND_STORE:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.store:
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    br i1 true, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.store1:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
+; CHECK-NEXT:    store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> <i1 false, i1 true>)
+  ret void
+}
+
+declare void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64>, i64*, <2 x i1>)

Added: llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll (added)
+++ llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S %s -scalarize-masked-mem-intrin -mtriple=x86_64-linux-gnu | FileCheck %s
+
+define <2 x i64> @scalarize_v2i64(i64* %p, <2 x i1> %mask, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.load:
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP3]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP4]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
+; CHECK-NEXT:    br i1 [[TMP5]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.load1:
+; CHECK-NEXT:    [[TMP6:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP6]], i64 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP7]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i64> [[RES_PHI_ELSE3]]
+;
+  %ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> %mask, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @scalarize_v2i64_ones_mask(i64* %p, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64_ones_mask(
+; CHECK-NEXT:    br i1 true, label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.load:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ]
+; CHECK-NEXT:    br i1 true, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.load1:
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i64> [[RES_PHI_ELSE3]]
+;
+  %ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> <i1 true, i1 true>, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @scalarize_v2i64_zero_mask(i64* %p, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64_zero_mask(
+; CHECK-NEXT:    br i1 false, label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.load:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ]
+; CHECK-NEXT:    br i1 false, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.load1:
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i64> [[RES_PHI_ELSE3]]
+;
+  %ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @scalarize_v2i64_const_mask(i64* %p, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64_const_mask(
+; CHECK-NEXT:    br i1 false, label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.load:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ]
+; CHECK-NEXT:    br i1 true, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.load1:
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i64> [[RES_PHI_ELSE3]]
+;
+  %ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+declare <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64*,  <2 x i1>, <2 x i64>)

Added: llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll (added)
+++ llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-gather.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S %s -scalarize-masked-mem-intrin -mtriple=x86_64-linux-gnu | FileCheck %s
+
+define <2 x i64> @scalarize_v2i64(<2 x i64*> %p, <2 x i1> %mask, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64(
+; CHECK-NEXT:    [[MASK0:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
+; CHECK-NEXT:    br i1 [[MASK0]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.load:
+; CHECK-NEXT:    [[PTR0:%.*]] = extractelement <2 x i64*> [[P:%.*]], i64 0
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i64, i64* [[PTR0]], align 8
+; CHECK-NEXT:    [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i64 0
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[RES0]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[MASK1:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
+; CHECK-NEXT:    br i1 [[MASK1]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.load1:
+; CHECK-NEXT:    [[PTR1:%.*]] = extractelement <2 x i64*> [[P]], i64 1
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8
+; CHECK-NEXT:    [[RES1:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[LOAD1]], i64 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[RES1]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i64> [[RES_PHI_ELSE3]]
+;
+  %ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> %mask, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @scalarize_v2i64_ones_mask(<2 x i64*> %p, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64_ones_mask(
+; CHECK-NEXT:    [[PTR0:%.*]] = extractelement <2 x i64*> [[P:%.*]], i64 0
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i64, i64* [[PTR0]], align 8
+; CHECK-NEXT:    [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i64 0
+; CHECK-NEXT:    [[PTR1:%.*]] = extractelement <2 x i64*> [[P]], i64 1
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8
+; CHECK-NEXT:    [[RES1:%.*]] = insertelement <2 x i64> [[RES0]], i64 [[LOAD1]], i64 1
+; CHECK-NEXT:    ret <2 x i64> [[RES1]]
+;
+  %ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @scalarize_v2i64_zero_mask(<2 x i64*> %p, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64_zero_mask(
+; CHECK-NEXT:    ret <2 x i64> [[PASSTHRU:%.*]]
+;
+  %ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @scalarize_v2i64_const_mask(<2 x i64*> %p, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64_const_mask(
+; CHECK-NEXT:    [[PTR1:%.*]] = extractelement <2 x i64*> [[P:%.*]], i64 1
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, i64* [[PTR1]], align 8
+; CHECK-NEXT:    [[RES1:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD1]], i64 1
+; CHECK-NEXT:    ret <2 x i64> [[RES1]]
+;
+  %ret = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %p, i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)

Added: llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll (added)
+++ llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S %s -scalarize-masked-mem-intrin -mtriple=x86_64-linux-gnu | FileCheck %s
+
+define <2 x i64> @scalarize_v2i64(<2 x i64>* %p, <2 x i1> %mask, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.load:
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP4]], i64 0
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
+; CHECK-NEXT:    br i1 [[TMP6]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.load1:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP8]], i64 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i64> [[RES_PHI_ELSE3]]
+;
+  %ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 128, <2 x i1> %mask, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @scalarize_v2i64_ones_mask(<2 x i64>* %p, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64_ones_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[P:%.*]], align 8
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @scalarize_v2i64_zero_mask(<2 x i64>* %p, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64_zero_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
+; CHECK-NEXT:    ret <2 x i64> [[PASSTHRU:%.*]]
+;
+  %ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @scalarize_v2i64_const_mask(<2 x i64>* %p, <2 x i64> %passthru) {
+; CHECK-LABEL: @scalarize_v2i64_const_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP3]], i64 1
+; CHECK-NEXT:    ret <2 x i64> [[TMP4]]
+;
+  %ret = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %p, i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
+  ret <2 x i64> %ret
+}
+
+; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
+define <2 x i24> @scalarize_v2i24(<2 x i24>* %p, <2 x i1> %mask, <2 x i24> %passthru) {
+; CHECK-LABEL: @scalarize_v2i24(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i24>* [[P:%.*]] to i24*
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.load:
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i24, i24* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = load i24, i24* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i24> [[PASSTHRU:%.*]], i24 [[TMP4]], i64 0
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i24> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
+; CHECK-NEXT:    br i1 [[TMP6]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.load1:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i24, i24* [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = load i24, i24* [[TMP7]], align 1
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i24> [[RES_PHI_ELSE]], i24 [[TMP8]], i64 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i24> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i24> [[RES_PHI_ELSE3]]
+;
+  %ret = call <2 x i24> @llvm.masked.load.v2i24.p0v2i24(<2 x i24>* %p, i32 8, <2 x i1> %mask, <2 x i24> %passthru)
+  ret <2 x i24> %ret
+}
+
+; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
+define <2 x i48> @scalarize_v2i48(<2 x i48>* %p, <2 x i1> %mask, <2 x i48> %passthru) {
+; CHECK-LABEL: @scalarize_v2i48(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i48>* [[P:%.*]] to i48*
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.load:
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i48, i48* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = load i48, i48* [[TMP3]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i48> [[PASSTHRU:%.*]], i48 [[TMP4]], i64 0
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[RES_PHI_ELSE:%.*]] = phi <2 x i48> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
+; CHECK-NEXT:    br i1 [[TMP6]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.load1:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i48, i48* [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = load i48, i48* [[TMP7]], align 2
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i48> [[RES_PHI_ELSE]], i48 [[TMP8]], i64 1
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    [[RES_PHI_ELSE3:%.*]] = phi <2 x i48> [ [[TMP9]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i48> [[RES_PHI_ELSE3]]
+;
+  %ret = call <2 x i48> @llvm.masked.load.v2i48.p0v2i48(<2 x i48>* %p, i32 16, <2 x i1> %mask, <2 x i48> %passthru)
+  ret <2 x i48> %ret
+}
+
+declare <2 x i24> @llvm.masked.load.v2i24.p0v2i24(<2 x i24>*, i32, <2 x i1>, <2 x i24>)
+declare <2 x i48> @llvm.masked.load.v2i48.p0v2i48(<2 x i48>*, i32, <2 x i1>, <2 x i48>)
+declare <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>*, i32, <2 x i1>, <2 x i64>)

Added: llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-store.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-store.ll (added)
+++ llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-store.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S %s -scalarize-masked-mem-intrin -mtriple=x86_64-linux-gnu | FileCheck %s
+
+define void @scalarize_v2i64(<2 x i64>* %p, <2 x i1> %mask, <2 x i64> %data) {
+; CHECK-LABEL: @scalarize_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[MASK:%.*]], i64 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[COND_STORE:%.*]], label [[ELSE:%.*]]
+; CHECK:       cond.store:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
+; CHECK-NEXT:    store i64 [[TMP3]], i64* [[TMP4]], align 8
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[MASK]], i64 1
+; CHECK-NEXT:    br i1 [[TMP5]], label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
+; CHECK:       cond.store1:
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 1
+; CHECK-NEXT:    store i64 [[TMP6]], i64* [[TMP7]], align 8
+; CHECK-NEXT:    br label [[ELSE2]]
+; CHECK:       else2:
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %data, <2 x i64>* %p, i32 128, <2 x i1> %mask)
+  ret void
+}
+
+define void @scalarize_v2i64_ones_mask(<2 x i64>* %p, <2 x i64> %data) {
+; CHECK-LABEL: @scalarize_v2i64_ones_mask(
+; CHECK-NEXT:    store <2 x i64> [[DATA:%.*]], <2 x i64>* [[P:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %data, <2 x i64>* %p, i32 8, <2 x i1> <i1 true, i1 true>)
+  ret void
+}
+
+define void @scalarize_v2i64_zero_mask(<2 x i64>* %p, <2 x i64> %data) {
+; CHECK-LABEL: @scalarize_v2i64_zero_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %data, <2 x i64>* %p, i32 8, <2 x i1> <i1 false, i1 false>)
+  ret void
+}
+
+define void @scalarize_v2i64_const_mask(<2 x i64>* %p, <2 x i64> %data) {
+; CHECK-LABEL: @scalarize_v2i64_const_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64>* [[P:%.*]] to i64*
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 1
+; CHECK-NEXT:    store i64 [[TMP2]], i64* [[TMP3]], align 8
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %data, <2 x i64>* %p, i32 8, <2 x i1> <i1 false, i1 true>)
+  ret void
+}
+
+declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)

Added: llvm/trunk/test/Transforms/Scalarizer/basic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Scalarizer/basic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Scalarizer/basic.ll (added)
+++ llvm/trunk/test/Transforms/Scalarizer/basic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,453 @@
+; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s
+; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare <4 x float> @ext(<4 x float>)
+ at g = global <4 x float> zeroinitializer
+
+define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) {
+; CHECK-LABEL: @f1(
+; CHECK: entry:
+; CHECK:   %init.i0 = extractelement <4 x float> %init, i32 0
+; CHECK:   %init.i1 = extractelement <4 x float> %init, i32 1
+; CHECK:   %init.i2 = extractelement <4 x float> %init, i32 2
+; CHECK:   %init.i3 = extractelement <4 x float> %init, i32 3
+; CHECK:   br label %loop
+; CHECK: loop:
+; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
+; CHECK:   %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ]
+; CHECK:   %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ]
+; CHECK:   %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ]
+; CHECK:   %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ]
+; CHECK:   %nexti = sub i32 %i, 1
+; CHECK:   %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
+; CHECK:   %ptr.i0 = bitcast <4 x float>* %ptr to float*
+; CHECK:   %val.i0 = load float, float* %ptr.i0, align 16
+; CHECK:   %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
+; CHECK:   %val.i1 = load float, float* %ptr.i1, align 4
+; CHECK:   %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
+; CHECK:   %val.i2 = load float, float* %ptr.i2, align 8
+; CHECK:   %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
+; CHECK:   %val.i3 = load float, float* %ptr.i3, align 4
+; CHECK:   %add.i0 = fadd float %val.i0, %val.i2
+; CHECK:   %add.i1 = fadd float %val.i1, %val.i3
+; CHECK:   %add.i2 = fadd float %acc.i0, %acc.i2
+; CHECK:   %add.i3 = fadd float %acc.i1, %acc.i3
+; CHECK:   %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0
+; CHECK:   %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1
+; CHECK:   %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2
+; CHECK:   %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
+; CHECK:   %call = call <4 x float> @ext(<4 x float> %add)
+; CHECK:   %call.i0 = extractelement <4 x float> %call, i32 0
+; CHECK:   %cmp.i0 = fcmp ogt float %call.i0, 1.0
+; CHECK:   %call.i1 = extractelement <4 x float> %call, i32 1
+; CHECK:   %cmp.i1 = fcmp ogt float %call.i1, 2.0
+; CHECK:   %call.i2 = extractelement <4 x float> %call, i32 2
+; CHECK:   %cmp.i2 = fcmp ogt float %call.i2, 3.0
+; CHECK:   %call.i3 = extractelement <4 x float> %call, i32 3
+; CHECK:   %cmp.i3 = fcmp ogt float %call.i3, 4.0
+; CHECK:   %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0
+; CHECK:   %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0
+; CHECK:   %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0
+; CHECK:   %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0
+; CHECK:   store float %sel.i0, float* %ptr.i0
+; CHECK:   store float %sel.i1, float* %ptr.i1
+; CHECK:   store float %sel.i2, float* %ptr.i2
+; CHECK:   store float %sel.i3, float* %ptr.i3
+; CHECK:   %test = icmp eq i32 %nexti, 0
+; CHECK:   br i1 %test, label %loop, label %exit
+; CHECK: exit:
+; CHECK:   ret void
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
+  %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
+  %nexti = sub i32 %i, 1
+
+  %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
+  %val = load <4 x float> , <4 x float> *%ptr
+  %dval = bitcast <4 x float> %val to <2 x double>
+  %dacc = bitcast <4 x float> %acc to <2 x double>
+  %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
+                            <2 x i32> <i32 0, i32 2>
+  %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
+                            <2 x i32> <i32 1, i32 3>
+  %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
+  %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
+  %add = fadd <4 x float> %f1, %f2
+  %call = call <4 x float> @ext(<4 x float> %add)
+  %cmp = fcmp ogt <4 x float> %call,
+                  <float 1.0, float 2.0, float 3.0, float 4.0>
+  %sel = select <4 x i1> %cmp, <4 x float> %call,
+                <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
+  store <4 x float> %sel, <4 x float> *%ptr
+
+  %test = icmp eq i32 %nexti, 0
+  br i1 %test, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) {
+; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) {
+; CHECK: entry:
+; CHECK:   %init.i0 = extractelement <4 x i32> %init, i32 0
+; CHECK:   %init.i1 = extractelement <4 x i32> %init, i32 1
+; CHECK:   %init.i2 = extractelement <4 x i32> %init, i32 2
+; CHECK:   %init.i3 = extractelement <4 x i32> %init, i32 3
+; CHECK:   br label %loop
+; CHECK: loop:
+; CHECK:   %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
+; CHECK:   %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ]
+; CHECK:   %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ]
+; CHECK:   %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ]
+; CHECK:   %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ]
+; CHECK:   %nexti = sub i32 %i, 1
+; CHECK:   %ptr = getelementptr <4 x i8>, <4 x i8>* %base, i32 %i
+; CHECK:   %ptr.i0 = bitcast <4 x i8>* %ptr to i8*
+; CHECK:   %val.i0 = load i8, i8* %ptr.i0, align 4
+; CHECK:   %ptr.i1 = getelementptr i8, i8* %ptr.i0, i32 1
+; CHECK:   %val.i1 = load i8, i8* %ptr.i1, align 1
+; CHECK:   %ptr.i2 = getelementptr i8, i8* %ptr.i0, i32 2
+; CHECK:   %val.i2 = load i8, i8* %ptr.i2, align 2
+; CHECK:   %ptr.i3 = getelementptr i8, i8* %ptr.i0, i32 3
+; CHECK:   %val.i3 = load i8, i8* %ptr.i3, align 1
+; CHECK:   %ext.i0 = sext i8 %val.i0 to i32
+; CHECK:   %ext.i1 = sext i8 %val.i1 to i32
+; CHECK:   %ext.i2 = sext i8 %val.i2 to i32
+; CHECK:   %ext.i3 = sext i8 %val.i3 to i32
+; CHECK:   %add.i0 = add i32 %ext.i0, %acc.i0
+; CHECK:   %add.i1 = add i32 %ext.i1, %acc.i1
+; CHECK:   %add.i2 = add i32 %ext.i2, %acc.i2
+; CHECK:   %add.i3 = add i32 %ext.i3, %acc.i3
+; CHECK:   %cmp.i0 = icmp slt i32 %add.i0, -10
+; CHECK:   %cmp.i1 = icmp slt i32 %add.i1, -11
+; CHECK:   %cmp.i2 = icmp slt i32 %add.i2, -12
+; CHECK:   %cmp.i3 = icmp slt i32 %add.i3, -13
+; CHECK:   %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i
+; CHECK:   %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i
+; CHECK:   %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i
+; CHECK:   %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i
+; CHECK:   %trunc.i0 = trunc i32 %sel.i0 to i8
+; CHECK:   %trunc.i1 = trunc i32 %sel.i1 to i8
+; CHECK:   %trunc.i2 = trunc i32 %sel.i2 to i8
+; CHECK:   %trunc.i3 = trunc i32 %sel.i3 to i8
+; CHECK:   store i8 %trunc.i0, i8* %ptr.i0, align 4
+; CHECK:   store i8 %trunc.i1, i8* %ptr.i1, align 1
+; CHECK:   store i8 %trunc.i2, i8* %ptr.i2, align 2
+; CHECK:   store i8 %trunc.i3, i8* %ptr.i3, align 1
+; CHECK:   %test = icmp eq i32 %nexti, 0
+; CHECK:   br i1 %test, label %loop, label %exit
+; CHECK: exit:
+; CHECK:   ret void
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
+  %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
+  %nexti = sub i32 %i, 1
+
+  %ptr = getelementptr <4 x i8>, <4 x i8> *%base, i32 %i
+  %val = load <4 x i8> , <4 x i8> *%ptr
+  %ext = sext <4 x i8> %val to <4 x i32>
+  %add = add <4 x i32> %ext, %acc
+  %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
+  %single = insertelement <4 x i32> undef, i32 %i, i32 0
+  %limit = shufflevector <4 x i32> %single, <4 x i32> undef,
+                         <4 x i32> zeroinitializer
+  %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
+  %trunc = trunc <4 x i32> %sel to <4 x i8>
+  store <4 x i8> %trunc, <4 x i8> *%ptr
+
+  %test = icmp eq i32 %nexti, 0
+  br i1 %test, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Check that !tbaa information is preserved.
+define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
+; CHECK-LABEL: @f3(
+; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]]
+; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa ![[TAG]]
+; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa ![[TAG]]
+; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa ![[TAG]]
+; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]]
+; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]]
+; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]]
+; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]]
+; CHECK: ret void
+  %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1
+  %add = add <4 x i32> %val, %val
+  store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
+  ret void
+}
+
+; Check that !tbaa.struct information is preserved.
+define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
+; CHECK-LABEL: @f4(
+; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]]
+; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa.struct ![[TAG]]
+; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa.struct ![[TAG]]
+; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa.struct ![[TAG]]
+; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]]
+; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]]
+; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]]
+; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]]
+; CHECK: ret void
+  %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5
+  %add = add <4 x i32> %val, %val
+  store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
+  ret void
+}
+
+; Check that llvm.access.group information is preserved.
+define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
+; CHECK-LABEL: @f5(
+; CHECK: %val.i0 = load i32, i32* %this_src.i0, align 16, !llvm.access.group ![[TAG:[0-9]*]]
+; CHECK: %val.i1 = load i32, i32* %this_src.i1, align 4, !llvm.access.group ![[TAG]]
+; CHECK: %val.i2 = load i32, i32* %this_src.i2, align 8, !llvm.access.group ![[TAG]]
+; CHECK: %val.i3 = load i32, i32* %this_src.i3, align 4, !llvm.access.group ![[TAG]]
+; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.access.group ![[TAG]]
+; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.access.group ![[TAG]]
+; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.access.group ![[TAG]]
+; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.access.group ![[TAG]]
+; CHECK: ret void
+entry:
+  br label %loop
+
+loop:
+  %index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
+  %this_src = getelementptr <4 x i32>, <4 x i32> *%src, i32 %index
+  %this_dst = getelementptr <4 x i32>, <4 x i32> *%dst, i32 %index
+  %val = load <4 x i32> , <4 x i32> *%this_src, !llvm.access.group !13
+  %add = add <4 x i32> %val, %val
+  store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.access.group !13
+  %next_index = add i32 %index, -1
+  %continue = icmp ne i32 %next_index, %count
+  br i1 %continue, label %loop, label %end, !llvm.loop !3
+
+end:
+  ret void
+}
+
+; Check that fpmath information is preserved.
+define <4 x float> @f6(<4 x float> %x) {
+; CHECK-LABEL: @f6(
+; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0
+; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]]
+; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1
+; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]]
+; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2
+; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]]
+; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3
+; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]]
+; CHECK: %res.upto0 = insertelement <4 x float> undef, float %res.i0, i32 0
+; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1
+; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2
+; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3
+; CHECK: ret <4 x float> %res
+  %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
+    !fpmath !4
+  ret <4 x float> %res
+}
+
+; Check that random metadata isn't kept.
+define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
+; CHECK-LABEL: @f7(
+; CHECK-NOT: !foo
+; CHECK: ret void
+  %val = load <4 x i32> , <4 x i32> *%src, !foo !5
+  %add = add <4 x i32> %val, %val
+  store <4 x i32> %add, <4 x i32> *%dst, !foo !5
+  ret void
+}
+
+; Test GEP with vectors.
+define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
+                float *%other) {
+; CHECK-LABEL: @f8(
+; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
+; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
+; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
+; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
+; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
+; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
+; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
+; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100
+; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1
+; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
+; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100
+; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
+; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3
+; CHECK: store float* %val.i0, float** %dest.i0, align 32
+; CHECK: store float* %val.i1, float** %dest.i1, align 8
+; CHECK: store float* %val.i2, float** %dest.i2, align 16
+; CHECK: store float* %val.i3, float** %dest.i3, align 8
+; CHECK: ret void
+  %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
+  %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
+  %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1
+  %val = getelementptr float, <4 x float *> %ptr1, <4 x i32> %i2
+  store <4 x float *> %val, <4 x float *> *%dest
+  ret void
+}
+
+; Test the handling of unaligned loads.
+define void @f9(<4 x float> *%dest, <4 x float> *%src) {
+; CHECK: @f9(
+; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
+; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
+; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
+; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
+; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
+; CHECK: %val.i0 = load float, float* %src.i0, align 4
+; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
+; CHECK: %val.i1 = load float, float* %src.i1, align 4
+; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
+; CHECK: %val.i2 = load float, float* %src.i2, align 4
+; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
+; CHECK: %val.i3 = load float, float* %src.i3, align 4
+; CHECK: store float %val.i0, float* %dest.i0, align 8
+; CHECK: store float %val.i1, float* %dest.i1, align 4
+; CHECK: store float %val.i2, float* %dest.i2, align 8
+; CHECK: store float %val.i3, float* %dest.i3, align 4
+; CHECK: ret void
+  %val = load <4 x float> , <4 x float> *%src, align 4
+  store <4 x float> %val, <4 x float> *%dest, align 8
+  ret void
+}
+
+; ...and again with subelement alignment.
+define void @f10(<4 x float> *%dest, <4 x float> *%src) {
+; CHECK: @f10(
+; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
+; CHECK: %dest.i1 = getelementptr float, float* %dest.i0, i32 1
+; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
+; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
+; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
+; CHECK: %val.i0 = load float, float* %src.i0, align 1
+; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
+; CHECK: %val.i1 = load float, float* %src.i1, align 1
+; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
+; CHECK: %val.i2 = load float, float* %src.i2, align 1
+; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
+; CHECK: %val.i3 = load float, float* %src.i3, align 1
+; CHECK: store float %val.i0, float* %dest.i0, align 2
+; CHECK: store float %val.i1, float* %dest.i1, align 2
+; CHECK: store float %val.i2, float* %dest.i2, align 2
+; CHECK: store float %val.i3, float* %dest.i3, align 2
+; CHECK: ret void
+  %val = load <4 x float> , <4 x float> *%src, align 1
+  store <4 x float> %val, <4 x float> *%dest, align 2
+  ret void
+}
+
+; Test that sub-byte loads aren't scalarized.
+define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
+; CHECK: @f11(
+; CHECK: %val0 = load <32 x i1>, <32 x i1>* %src0
+; CHECK: %val1 = load <32 x i1>, <32 x i1>* %src1
+; CHECK: store <32 x i1> %and, <32 x i1>* %dest
+; CHECK: ret void
+  %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1
+  %val0 = load <32 x i1> , <32 x i1> *%src0
+  %val1 = load <32 x i1> , <32 x i1> *%src1
+  %and = and <32 x i1> %val0, %val1
+  store <32 x i1> %and, <32 x i1> *%dest
+  ret void
+}
+
+; Test that variable inserts aren't scalarized.
+define void @f12(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) {
+; CHECK: @f12(
+; CHECK: %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
+; CHECK-DAG: %val1.i0 = extractelement <4 x i32> %val1, i32 0
+; CHECK-DAG: %val1.i1 = extractelement <4 x i32> %val1, i32 1
+; CHECK-DAG: %val1.i2 = extractelement <4 x i32> %val1, i32 2
+; CHECK-DAG: %val1.i3 = extractelement <4 x i32> %val1, i32 3
+; CHECK-DAG: %val2.i0 = shl i32 1, %val1.i0
+; CHECK-DAG: %val2.i1 = shl i32 2, %val1.i1
+; CHECK-DAG: %val2.i2 = shl i32 3, %val1.i2
+; CHECK-DAG: %val2.i3 = shl i32 4, %val1.i3
+; CHECK: ret void
+  %val0 = load <4 x i32> , <4 x i32> *%src
+  %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
+  %val2 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %val1
+  store <4 x i32> %val2, <4 x i32> *%dest
+  ret void
+}
+
+; Test vector GEPs with more than one index.
+define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
+                 float *%other) {
+; CHECK-LABEL: @f13(
+; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
+; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
+; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
+; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
+; CHECK: %i.i0 = extractelement <4 x i32> %i, i32 0
+; CHECK: %ptr.i0 = extractelement <4 x [4 x float]*> %ptr, i32 0
+; CHECK: %val.i0 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i0, i32 0, i32 %i.i0
+; CHECK: %i.i1 = extractelement <4 x i32> %i, i32 1
+; CHECK: %ptr.i1 = extractelement <4 x [4 x float]*> %ptr, i32 1
+; CHECK: %val.i1 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i1, i32 1, i32 %i.i1
+; CHECK: %i.i2 = extractelement <4 x i32> %i, i32 2
+; CHECK: %ptr.i2 = extractelement <4 x [4 x float]*> %ptr, i32 2
+; CHECK: %val.i2 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i2, i32 2, i32 %i.i2
+; CHECK: %i.i3 = extractelement <4 x i32> %i, i32 3
+; CHECK: %ptr.i3 = extractelement <4 x [4 x float]*> %ptr, i32 3
+; CHECK: %val.i3 = getelementptr inbounds [4 x float], [4 x float]* %ptr.i3, i32 3, i32 %i.i3
+; CHECK: store float* %val.i0, float** %dest.i0, align 32
+; CHECK: store float* %val.i1, float** %dest.i1, align 8
+; CHECK: store float* %val.i2, float** %dest.i2, align 16
+; CHECK: store float* %val.i3, float** %dest.i3, align 8
+; CHECK: ret void
+  %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr,
+                                <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
+                                <4 x i32> %i
+  store <4 x float *> %val, <4 x float *> *%dest
+  ret void
+}
+
+; Test combinations of vector and non-vector PHIs.
+define <4 x float> @f14(<4 x float> %acc, i32 %count) {
+; CHECK-LABEL: @f14(
+; CHECK: %this_acc.i0 = phi float [ %acc.i0, %entry ], [ %next_acc.i0, %loop ]
+; CHECK: %this_acc.i1 = phi float [ %acc.i1, %entry ], [ %next_acc.i1, %loop ]
+; CHECK: %this_acc.i2 = phi float [ %acc.i2, %entry ], [ %next_acc.i2, %loop ]
+; CHECK: %this_acc.i3 = phi float [ %acc.i3, %entry ], [ %next_acc.i3, %loop ]
+; CHECK: %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
+; CHECK: %this_acc.upto0 = insertelement <4 x float> undef, float %this_acc.i0, i32 0
+; CHECK: %this_acc.upto1 = insertelement <4 x float> %this_acc.upto0, float %this_acc.i1, i32 1
+; CHECK: %this_acc.upto2 = insertelement <4 x float> %this_acc.upto1, float %this_acc.i2, i32 2
+; CHECK: %this_acc = insertelement <4 x float> %this_acc.upto2, float %this_acc.i3, i32 3
+; CHECK: ret <4 x float> %next_acc
+entry:
+  br label %loop
+
+loop:
+  %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ]
+  %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
+  %foo = call <4 x float> @ext(<4 x float> %this_acc)
+  %next_acc = fadd <4 x float> %this_acc, %foo
+  %next_count = sub i32 %this_count, 1
+  %cmp = icmp eq i32 %next_count, 0
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret <4 x float> %next_acc
+}
+
+!0 = !{ !"root" }
+!1 = !{ !"set1", !0 }
+!2 = !{ !"set2", !0 }
+!3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} }
+!4 = !{ float 4.0 }
+!5 = !{ i64 0, i64 8, null }
+!13 = distinct !{}

Added: llvm/trunk/test/Transforms/Scalarizer/cache-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Scalarizer/cache-bug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Scalarizer/cache-bug.ll (added)
+++ llvm/trunk/test/Transforms/Scalarizer/cache-bug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; RUN: opt -scalarizer -S < %s | FileCheck %s
+; RUN: opt -passes='function(scalarizer)' -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+
+; Check that vector element 1 is scalarized correctly from a chain of
+; insertelement instructions
+define void @func(i32 %x) {
+; CHECK-LABEL: @func(
+; CHECK-NOT: phi i32 [ %x, %entry ], [ %inc.pos.y, %loop ]
+; CHECK:     phi i32 [ %inc, %entry ], [ %inc.pos.y, %loop ]
+; CHECK:   ret void
+entry:
+  %vecinit = insertelement <2 x i32> <i32 0, i32 0>, i32 %x, i32 1
+  %inc = add i32 %x, 1
+  %0 = insertelement <2 x i32> %vecinit, i32 %inc, i32 1
+  br label %loop
+
+loop:
+  %pos = phi <2 x i32> [ %0, %entry ], [ %new.pos.y, %loop ]
+  %i = phi i32 [ 0, %entry ], [ %new.i, %loop ]
+  %pos.y = extractelement <2 x i32> %pos, i32 1
+  %inc.pos.y = add i32 %pos.y, 1
+  %new.pos.y = insertelement <2 x i32> %pos, i32 %inc.pos.y, i32 1
+  %new.i = add i32 %i, 1
+  %cmp2 = icmp slt i32 %new.i, 1
+  br i1 %cmp2, label %loop, label %exit
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/Scalarizer/crash-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Scalarizer/crash-bug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Scalarizer/crash-bug.ll (added)
+++ llvm/trunk/test/Transforms/Scalarizer/crash-bug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt %s -scalarizer -S -o - | FileCheck %s
+; RUN: opt %s -passes='function(scalarizer)' -S -o - | FileCheck %s
+
+; Don't crash
+
+define void @foo() {
+  br label %bb1
+
+bb2:                                        ; preds = %bb1
+  %bb2_vec = shufflevector <2 x i16> <i16 0, i16 10000>,
+                           <2 x i16> %bb1_vec,
+                           <2 x i32> <i32 0, i32 3>
+  br label %bb1
+
+bb1:                                        ; preds = %bb2, %0
+  %bb1_vec = phi <2 x i16> [ <i16 100, i16 200>, %0 ], [ %bb2_vec, %bb2 ]
+;CHECK: bb1:
+;CHECK: %bb1_vec.i0 = phi i16 [ 100, %0 ], [ 0, %bb2 ]
+;CHECK: %bb2_vec.i1 = phi i16 [ 200, %0 ], [ %bb2_vec.i1, %bb2 ]
+  br i1 undef, label %bb3, label %bb2
+
+bb3:
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/Scalarizer/dbginfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Scalarizer/dbginfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Scalarizer/dbginfo.ll (added)
+++ llvm/trunk/test/Transforms/Scalarizer/dbginfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,86 @@
+; RUN: opt %s -scalarizer -scalarize-load-store -S | FileCheck %s
+; RUN: opt %s -passes='function(scalarizer)' -scalarize-load-store -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x i32>* nocapture readonly %c) #0 !dbg !4 {
+; CHECK: @f1(
+; CHECK: %a.i0 = bitcast <4 x i32>* %a to i32*
+; CHECK: %a.i1 = getelementptr i32, i32* %a.i0, i32 1
+; CHECK: %a.i2 = getelementptr i32, i32* %a.i0, i32 2
+; CHECK: %a.i3 = getelementptr i32, i32* %a.i0, i32 3
+; CHECK: %c.i0 = bitcast <4 x i32>* %c to i32*
+; CHECK: %c.i1 = getelementptr i32, i32* %c.i0, i32 1
+; CHECK: %c.i2 = getelementptr i32, i32* %c.i0, i32 2
+; CHECK: %c.i3 = getelementptr i32, i32* %c.i0, i32 3
+; CHECK: %b.i0 = bitcast <4 x i32>* %b to i32*
+; CHECK: %b.i1 = getelementptr i32, i32* %b.i0, i32 1
+; CHECK: %b.i2 = getelementptr i32, i32* %b.i0, i32 2
+; CHECK: %b.i3 = getelementptr i32, i32* %b.i0, i32 3
+; CHECK: tail call void @llvm.dbg.value(metadata <4 x i32>* %a, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
+; CHECK: tail call void @llvm.dbg.value(metadata <4 x i32>* %b, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
+; CHECK: tail call void @llvm.dbg.value(metadata <4 x i32>* %c, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
+; CHECK: %bval.i0 = load i32, i32* %b.i0, align 16, !dbg ![[TAG1:[0-9]+]], !tbaa ![[TAG2:[0-9]+]]
+; CHECK: %bval.i1 = load i32, i32* %b.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %bval.i2 = load i32, i32* %b.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %bval.i3 = load i32, i32* %b.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i0 = load i32, i32* %c.i0, align 16, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i1 = load i32, i32* %c.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i2 = load i32, i32* %c.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i3 = load i32, i32* %c.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %add.i0 = add i32 %bval.i0, %cval.i0, !dbg ![[TAG1]]
+; CHECK: %add.i1 = add i32 %bval.i1, %cval.i1, !dbg ![[TAG1]]
+; CHECK: %add.i2 = add i32 %bval.i2, %cval.i2, !dbg ![[TAG1]]
+; CHECK: %add.i3 = add i32 %bval.i3, %cval.i3, !dbg ![[TAG1]]
+; CHECK: store i32 %add.i0, i32* %a.i0, align 16, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: store i32 %add.i1, i32* %a.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: store i32 %add.i2, i32* %a.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: store i32 %add.i3, i32* %a.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: ret void
+entry:
+  tail call void @llvm.dbg.value(metadata <4 x i32>* %a, metadata !15, metadata !DIExpression()), !dbg !20
+  tail call void @llvm.dbg.value(metadata <4 x i32>* %b, metadata !16, metadata !DIExpression()), !dbg !20
+  tail call void @llvm.dbg.value(metadata <4 x i32>* %c, metadata !17, metadata !DIExpression()), !dbg !20
+  %bval = load <4 x i32>, <4 x i32>* %b, align 16, !dbg !21, !tbaa !22
+  %cval = load <4 x i32>, <4 x i32>* %c, align 16, !dbg !21, !tbaa !22
+  %add = add <4 x i32> %bval, %cval, !dbg !21
+  store <4 x i32> %add, <4 x i32>* %a, align 16, !dbg !21, !tbaa !22
+  ret void, !dbg !25
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18, !26}
+!llvm.ident = !{!19}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 194134) (llvm/trunk 194126)", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "f1", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 4, file: !1, scope: !5, type: !6, retainedNodes: !14)
+!5 = !DIFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null, !8, !8, !8}
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
+!9 = !DIDerivedType(tag: DW_TAG_typedef, name: "V4SI", line: 1, file: !1, baseType: !10)
+!10 = !DICompositeType(tag: DW_TAG_array_type, size: 128, align: 128, flags: DIFlagVector, baseType: !11, elements: !12)
+!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!12 = !{!13}
+!13 = !DISubrange(count: 4)
+!14 = !{!15, !16, !17}
+!15 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "b", line: 3, arg: 2, scope: !4, file: !5, type: !8)
+!17 = !DILocalVariable(name: "c", line: 3, arg: 3, scope: !4, file: !5, type: !8)
+!18 = !{i32 2, !"Dwarf Version", i32 4}
+!19 = !{!"clang version 3.4 (trunk 194134) (llvm/trunk 194126)"}
+!20 = !DILocation(line: 3, scope: !4)
+!21 = !DILocation(line: 5, scope: !4)
+!22 = !{!23, !23, i64 0}
+!23 = !{!"omnipotent char", !24, i64 0}
+!24 = !{!"Simple C/C++ TBAA"}
+!25 = !DILocation(line: 6, scope: !4)
+!26 = !{i32 1, !"Debug Info Version", i32 3}

Added: llvm/trunk/test/Transforms/Scalarizer/dbgloc-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Scalarizer/dbgloc-bug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Scalarizer/dbgloc-bug.ll (added)
+++ llvm/trunk/test/Transforms/Scalarizer/dbgloc-bug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt -S -march=x86 -scalarizer %s | FileCheck %s
+; RUN: opt -S -march=x86 -passes='function(scalarizer)' %s | FileCheck %s
+
+; Reproducer for pr27938
+; https://llvm.org/bugs/show_bug.cgi?id=27938
+
+define i16 @f1() !dbg !5 {
+  ret i16 undef, !dbg !9
+}
+
+define void @f2() !dbg !10 {
+bb1:
+  %_tmp7 = tail call i16 @f1(), !dbg !13
+; CHECK: call i16 @f1(), !dbg !13
+  %broadcast.splatinsert5 = insertelement <4 x i16> undef, i16 %_tmp7, i32 0
+  %broadcast.splat6 = shufflevector <4 x i16> %broadcast.splatinsert5, <4 x i16> undef, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:
+  br i1 undef, label %middle.block, label %vector.body
+
+middle.block:
+  ret void, !dbg !15
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, retainedTypes: !2)
+!1 = !DIFile(filename: "dbgloc-bug.c", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 9, type: !6, isLocal: false, isDefinition: true, scopeLine: 10, isOptimized: true, unit: !0, retainedNodes: !2)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(name: "short", size: 16, align: 16, encoding: DW_ATE_signed)
+!9 = !DILocation(line: 11, column: 5, scope: !5)
+!10 = distinct !DISubprogram(name: "f2", scope: !1, file: !1, line: 14, type: !11, isLocal: false, isDefinition: true, scopeLine: 15, isOptimized: true, unit: !0, retainedNodes: !2)
+!11 = !DISubroutineType(types: !12)
+!12 = !{null}
+!13 = !DILocation(line: 24, column: 9, scope: !14)
+!14 = !DILexicalBlock(scope: !10, file: !1, line: 17, column: 5)
+!15 = !DILocation(line: 28, column: 1, scope: !10)

Added: llvm/trunk/test/Transforms/Scalarizer/intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Scalarizer/intrinsics.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Scalarizer/intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/Scalarizer/intrinsics.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,110 @@
+; RUN: opt -S -scalarizer %s | FileCheck %s
+; RUN: opt -S -passes='function(scalarizer)' %s | FileCheck %s
+
+; Unary fp
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
+
+; Binary fp
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
+declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
+declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
+
+; Ternary fp
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
+
+; Binary int
+declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
+
+; Unary int plus constant scalar operand
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
+
+; Unary fp plus any scalar operand
+declare <2 x float> @llvm.powi.v2f32(<2 x float>, i32)
+
+; CHECK-LABEL: @scalarize_sqrt_v2f32(
+; CHECK: %sqrt.i0 = call float @llvm.sqrt.f32(float %x.i0)
+; CHECK: %sqrt.i1 = call float @llvm.sqrt.f32(float %x.i1)
+; CHECK: %sqrt.upto0 = insertelement <2 x float> undef, float %sqrt.i0, i32 0
+; CHECK: %sqrt = insertelement <2 x float> %sqrt.upto0, float %sqrt.i1, i32 1
+; CHECK: ret <2 x float> %sqrt
+define <2 x float> @scalarize_sqrt_v2f32(<2 x float> %x) #0 {
+  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
+  ret <2 x float> %sqrt
+}
+
+; CHECK-LABEL: @scalarize_minnum_v2f32(
+; CHECK: %minnum.i0 = call float @llvm.minnum.f32(float %x.i0, float %y.i0)
+; CHECK: %minnum.i1 = call float @llvm.minnum.f32(float %x.i1, float %y.i1)
+; CHECK: %minnum.upto0 = insertelement <2 x float> undef, float %minnum.i0, i32 0
+; CHECK: %minnum = insertelement <2 x float> %minnum.upto0, float %minnum.i1, i32 1
+; CHECK: ret <2 x float> %minnum
+define <2 x float> @scalarize_minnum_v2f32(<2 x float> %x, <2 x float> %y) #0 {
+  %minnum = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %minnum
+}
+
+; CHECK-LABEL: @scalarize_minimum_v2f32(
+; CHECK: %minimum.i0 = call float @llvm.minimum.f32(float %x.i0, float %y.i0)
+; CHECK: %minimum.i1 = call float @llvm.minimum.f32(float %x.i1, float %y.i1)
+; CHECK: %minimum.upto0 = insertelement <2 x float> undef, float %minimum.i0, i32 0
+; CHECK: %minimum = insertelement <2 x float> %minimum.upto0, float %minimum.i1, i32 1
+; CHECK: ret <2 x float> %minimum
+define <2 x float> @scalarize_minimum_v2f32(<2 x float> %x, <2 x float> %y) #0 {
+  %minimum = call <2 x float> @llvm.minimum.v2f32(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %minimum
+}
+
+; CHECK-LABEL: @scalarize_maximum_v2f32(
+; CHECK: %maximum.i0 = call float @llvm.maximum.f32(float %x.i0, float %y.i0)
+; CHECK: %maximum.i1 = call float @llvm.maximum.f32(float %x.i1, float %y.i1)
+; CHECK: %maximum.upto0 = insertelement <2 x float> undef, float %maximum.i0, i32 0
+; CHECK: %maximum = insertelement <2 x float> %maximum.upto0, float %maximum.i1, i32 1
+; CHECK: ret <2 x float> %maximum
+define <2 x float> @scalarize_maximum_v2f32(<2 x float> %x, <2 x float> %y) #0 {
+  %maximum = call <2 x float> @llvm.maximum.v2f32(<2 x float> %x, <2 x float> %y)
+  ret <2 x float> %maximum
+}
+
+; CHECK-LABEL: @scalarize_fma_v2f32(
+; CHECK: %fma.i0 = call float @llvm.fma.f32(float %x.i0, float %y.i0, float %z.i0)
+; CHECK: %fma.i1 = call float @llvm.fma.f32(float %x.i1, float %y.i1, float %z.i1)
+; CHECK: %fma.upto0 = insertelement <2 x float> undef, float %fma.i0, i32 0
+; CHECK: %fma = insertelement <2 x float> %fma.upto0, float %fma.i1, i32 1
+; CHECK: ret <2 x float> %fma
+define <2 x float> @scalarize_fma_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
+  %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
+  ret <2 x float> %fma
+}
+
+; CHECK-LABEL: @scalarize_bswap_v2i32(
+; CHECK: %bswap.i0 = call i32 @llvm.bswap.i32(i32 %x.i0)
+; CHECK: %bswap.i1 = call i32 @llvm.bswap.i32(i32 %x.i1)
+; CHECK: %bswap.upto0 = insertelement <2 x i32> undef, i32 %bswap.i0, i32 0
+; CHECK: %bswap = insertelement <2 x i32> %bswap.upto0, i32 %bswap.i1, i32 1
+; CHECK: ret <2 x i32> %bswap
+define <2 x i32> @scalarize_bswap_v2i32(<2 x i32> %x) #0 {
+  %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %x)
+  ret <2 x i32> %bswap
+}
+
+; CHECK-LABEL: @scalarize_ctlz_v2i32(
+; CHECK: %ctlz.i0 = call i32 @llvm.ctlz.i32(i32 %x.i0, i1 true)
+; CHECK: %ctlz.i1 = call i32 @llvm.ctlz.i32(i32 %x.i1, i1 true)
+; CHECK: %ctlz.upto0 = insertelement <2 x i32> undef, i32 %ctlz.i0, i32 0
+; CHECK: %ctlz = insertelement <2 x i32> %ctlz.upto0, i32 %ctlz.i1, i32 1
+; CHECK: ret <2 x i32> %ctlz
+define <2 x i32> @scalarize_ctlz_v2i32(<2 x i32> %x) #0 {
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true)
+  ret <2 x i32> %ctlz
+}
+
+; CHECK-LABEL: @scalarize_powi_v2f32(
+; CHECK: %powi.i0 = call float @llvm.powi.f32(float %x.i0, i32 %y)
+; CHECK: %powi.i1 = call float @llvm.powi.f32(float %x.i1, i32 %y)
+; CHECK: %powi.upto0 = insertelement <2 x float> undef, float %powi.i0, i32 0
+; CHECK: %powi = insertelement <2 x float> %powi.upto0, float %powi.i1, i32 1
+; CHECK: ret <2 x float> %powi
+define <2 x float> @scalarize_powi_v2f32(<2 x float> %x, i32 %y) #0 {
+  %powi = call <2 x float> @llvm.powi.v2f32(<2 x float> %x, i32 %y)
+  ret <2 x float> %powi
+}

Added: llvm/trunk/test/Transforms/Scalarizer/order-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Scalarizer/order-bug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Scalarizer/order-bug.ll (added)
+++ llvm/trunk/test/Transforms/Scalarizer/order-bug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt %s -scalarizer -S -o - | FileCheck %s
+; RUN: opt %s -passes='function(scalarizer)' -S -o - | FileCheck %s
+
+; This input caused the scalarizer to replace & erase gathered results when 
+; future gathered results depended on them being alive
+
+define dllexport spir_func <4 x i32> @main(float %a) {
+entry:
+  %i = insertelement <4 x float> undef, float %a, i32 0
+  br label %z
+
+y:
+; CHECK: %f.upto0 = insertelement <4 x i32> undef, i32 %b.i0, i32 0
+; CHECK: %f.upto1 = insertelement <4 x i32> %f.upto0, i32 %b.i0, i32 1
+; CHECK: %f.upto2 = insertelement <4 x i32> %f.upto1, i32 %b.i0, i32 2
+; CHECK: %f = insertelement <4 x i32> %f.upto2, i32 %b.i0, i32 3
+  %f = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %f
+
+z:
+; CHECK: %b.i0 = bitcast float %a to i32
+  %b = bitcast <4 x float> %i to <4 x i32>
+  br label %y
+}

Added: llvm/trunk/test/Transforms/Scalarizer/phi-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Scalarizer/phi-bug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Scalarizer/phi-bug.ll (added)
+++ llvm/trunk/test/Transforms/Scalarizer/phi-bug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt %s -scalarizer -verify -S -o - | FileCheck %s
+; RUN: opt %s -passes='function(scalarizer,verify)' -S -o - | FileCheck %s
+
+define void @f3() local_unnamed_addr {
+bb1:
+  br label %bb2
+
+bb3:
+; CHECK-LABEL: bb3:
+; CHECK-NEXT: br label %bb4
+  %h.10.0.vec.insert = shufflevector <1 x i16> %h.10.1, <1 x i16> undef, <1 x i32> <i32 0>
+  br label %bb4
+
+bb2:
+; CHECK-LABEL: bb2:
+; CHECK: phi i16
+  %h.10.1 = phi <1 x i16> [ undef, %bb1 ]
+  br label %bb3
+
+bb4:
+; CHECK-LABEL: bb4:
+; CHECK: phi i16
+  %h.10.2 = phi <1 x i16> [ %h.10.0.vec.insert, %bb3 ]
+  ret void
+}

Added: llvm/trunk/test/Transforms/Scalarizer/store-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Scalarizer/store-bug.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Scalarizer/store-bug.ll (added)
+++ llvm/trunk/test/Transforms/Scalarizer/store-bug.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt -scalarizer -scalarize-load-store -S < %s | FileCheck %s
+; RUN: opt -passes='function(scalarizer)' -scalarize-load-store -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; This input caused the scalarizer not to clear cached results
+; properly.
+;
+; Any regressions should trigger an assert in the scalarizer.
+
+define void @func(<4 x float> %val, <4 x float> *%ptr) {
+  store <4 x float> %val, <4 x float> *%ptr
+  ret void
+; CHECK: store float %val.i0, float* %ptr.i0, align 16
+; CHECK: store float %val.i1, float* %ptr.i1, align 4
+; CHECK: store float %val.i2, float* %ptr.i2, align 8
+; CHECK: store float %val.i3, float* %ptr.i3, align 4
+}
+
+define void @func.copy(<4 x float> %val, <4 x float> *%ptr) {
+  store <4 x float> %val, <4 x float> *%ptr
+  ret void
+; CHECK: store float %val.i0, float* %ptr.i0, align 16
+; CHECK: store float %val.i1, float* %ptr.i1, align 4
+; CHECK: store float %val.i2, float* %ptr.i2, align 8
+; CHECK: store float %val.i3, float* %ptr.i3, align 4
+}

Added: llvm/trunk/test/Transforms/Scalarizer/vector-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Scalarizer/vector-gep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Scalarizer/vector-gep.ll (added)
+++ llvm/trunk/test/Transforms/Scalarizer/vector-gep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,123 @@
+; RUN: opt -S -scalarizer %s | FileCheck %s
+; RUN: opt -S -passes='function(scalarizer)' %s | FileCheck %s
+
+; Check that the scalarizer can handle vector GEPs with scalar indices
+
+ at vec = global <4 x i16*> <i16* null, i16* null, i16* null, i16* null>
+ at index = global i16 1
+ at ptr = global [4 x i16] [i16 1, i16 2, i16 3, i16 4]
+ at ptrptr = global i16* null
+
+; constant index
+define void @test1() {
+bb:
+  %0 = load <4 x i16*>, <4 x i16*>* @vec
+  %1 = getelementptr i16, <4 x i16*> %0, i16 1
+
+  ret void
+}
+
+;CHECK-LABEL: @test1
+;CHECK: %[[I0:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 0
+;CHECK: getelementptr i16, i16* %[[I0]], i16 1
+;CHECK: %[[I1:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 1
+;CHECK: getelementptr i16, i16* %[[I1]], i16 1
+;CHECK: %[[I2:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 2
+;CHECK: getelementptr i16, i16* %[[I2]], i16 1
+;CHECK: %[[I3:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 3
+;CHECK: getelementptr i16, i16* %[[I3]], i16 1
+
+; non-constant index
+define void @test2() {
+bb:
+  %0 = load <4 x i16*>, <4 x i16*>* @vec
+  %index = load i16, i16* @index
+  %1 = getelementptr i16, <4 x i16*> %0, i16 %index
+
+  ret void
+}
+
+;CHECK-LABEL: @test2
+;CHECK: %0 = load <4 x i16*>, <4 x i16*>* @vec
+;CHECK: %[[I0:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 0
+;CHECK: %[[I1:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 1
+;CHECK: %[[I2:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 2
+;CHECK: %[[I3:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 3
+;CHECK: %index = load i16, i16* @index
+;CHECK: %.splatinsert = insertelement <4 x i16> undef, i16 %index, i32 0
+;CHECK: %.splat = shufflevector <4 x i16> %.splatinsert, <4 x i16> undef, <4 x i32> zeroinitializer
+;CHECK: %.splat[[I0]] = extractelement <4 x i16> %.splat, i32 0
+;CHECK: getelementptr i16, i16* %[[I0]], i16 %.splat[[I0]]
+;CHECK: %.splat[[I1]] = extractelement <4 x i16> %.splat, i32 1
+;CHECK: getelementptr i16, i16* %[[I1]], i16 %.splat[[I1]]
+;CHECK: %.splat[[I2]] = extractelement <4 x i16> %.splat, i32 2
+;CHECK: getelementptr i16, i16* %[[I2]], i16 %.splat[[I2]]
+;CHECK: %.splat[[I3]] = extractelement <4 x i16> %.splat, i32 3
+;CHECK: getelementptr i16, i16* %[[I3]], i16 %.splat[[I3]]
+
+
+; Check that the scalarizer can handle vector GEPs with scalar pointer
+
+; constant pointer
+define void @test3() {
+bb:
+  %0 = bitcast [4 x i16]* @ptr to i16*
+  %1 = getelementptr i16, i16* %0, <4 x i16> <i16 0, i16 1, i16 2, i16 3>
+
+  ret void
+}
+
+;CHECK-LABEL: @test3
+;CHECK: %0 = bitcast [4 x i16]* @ptr to i16*
+;CHECK: %.splatinsert = insertelement <4 x i16*> undef, i16* %0, i32 0
+;CHECK: %.splat = shufflevector <4 x i16*> %.splatinsert, <4 x i16*> undef, <4 x i32> zeroinitializer
+;CHECK: %.splat[[I0:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 0
+;CHECK: getelementptr i16, i16* %.splat[[I0]], i16 0
+;CHECK: %.splat[[I1:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 1
+;CHECK: getelementptr i16, i16* %.splat[[I1]], i16 1
+;CHECK: %.splat[[I2:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 2
+;CHECK: getelementptr i16, i16* %.splat[[I2]], i16 2
+;CHECK: %.splat[[I3:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 3
+;CHECK: getelementptr i16, i16* %.splat[[I3]], i16 3
+
+; non-constant pointer
+define void @test4() {
+bb:
+  %0 = load i16*, i16** @ptrptr
+  %1 = getelementptr i16, i16* %0, <4 x i16> <i16 0, i16 1, i16 2, i16 3>
+
+  ret void
+}
+
+;CHECK-LABEL: @test4
+;CHECK: %0 = load i16*, i16** @ptrptr
+;CHECK: %.splatinsert = insertelement <4 x i16*> undef, i16* %0, i32 0
+;CHECK: %.splat = shufflevector <4 x i16*> %.splatinsert, <4 x i16*> undef, <4 x i32> zeroinitializer
+;CHECK: %.splat[[I0:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 0
+;CHECK: getelementptr i16, i16* %.splat[[I0]], i16 0
+;CHECK: %.splat[[I1:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 1
+;CHECK: getelementptr i16, i16* %.splat[[I1]], i16 1
+;CHECK: %.splat[[I2:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 2
+;CHECK: getelementptr i16, i16* %.splat[[I2]], i16 2
+;CHECK: %.splat[[I3:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 3
+;CHECK: getelementptr i16, i16* %.splat[[I3]], i16 3
+
+; constant index, inbounds
+define void @test5() {
+bb:
+  %0 = load <4 x i16*>, <4 x i16*>* @vec
+  %1 = getelementptr inbounds i16, <4 x i16*> %0, i16 1
+
+  ret void
+}
+
+;CHECK-LABEL: @test5
+;CHECK: %[[I0:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 0
+;CHECK: getelementptr inbounds i16, i16* %[[I0]], i16 1
+;CHECK: %[[I1:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 1
+;CHECK: getelementptr inbounds i16, i16* %[[I1]], i16 1
+;CHECK: %[[I2:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 2
+;CHECK: getelementptr inbounds i16, i16* %[[I2]], i16 1
+;CHECK: %[[I3:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 3
+;CHECK: getelementptr inbounds i16, i16* %[[I3]], i16 1
+

Added: llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll (added)
+++ llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,139 @@
+; RUN: opt -mtriple=amdgcn-- -S -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -gvn < %s | FileCheck -check-prefix=IR %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+ at array = internal addrspace(4) constant [4096 x [32 x float]] zeroinitializer, align 4
+
+; IR-LABEL: @sum_of_array(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 33
+define amdgpu_kernel void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+  %tmp = sext i32 %y to i64
+  %tmp1 = sext i32 %x to i64
+  %tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp1, i64 %tmp
+  %tmp4 = load float, float addrspace(4)* %tmp2, align 4
+  %tmp5 = fadd float %tmp4, 0.000000e+00
+  %tmp6 = add i32 %y, 1
+  %tmp7 = sext i32 %tmp6 to i64
+  %tmp8 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp1, i64 %tmp7
+  %tmp10 = load float, float addrspace(4)* %tmp8, align 4
+  %tmp11 = fadd float %tmp5, %tmp10
+  %tmp12 = add i32 %x, 1
+  %tmp13 = sext i32 %tmp12 to i64
+  %tmp14 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp13, i64 %tmp
+  %tmp16 = load float, float addrspace(4)* %tmp14, align 4
+  %tmp17 = fadd float %tmp11, %tmp16
+  %tmp18 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(4)* @array, i64 0, i64 %tmp13, i64 %tmp7
+  %tmp20 = load float, float addrspace(4)* %tmp18, align 4
+  %tmp21 = fadd float %tmp17, %tmp20
+  store float %tmp21, float addrspace(1)* %output, align 4
+  ret void
+}
+
+ at array2 = internal addrspace(4) constant [4096 x [4 x float]] zeroinitializer, align 4
+
+; Some of the indices go over the maximum mubuf offset, so don't split them.
+
+; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds float, float addrspace(4)* [[BASE_PTR]], i64 255
+; IR: add i32 %x, 256
+; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+define amdgpu_kernel void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+  %tmp = sext i32 %y to i64
+  %tmp1 = sext i32 %x to i64
+  %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp1, i64 %tmp
+  %tmp4 = load float, float addrspace(4)* %tmp2, align 4
+  %tmp5 = fadd float %tmp4, 0.000000e+00
+  %tmp6 = add i32 %y, 255
+  %tmp7 = sext i32 %tmp6 to i64
+  %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp1, i64 %tmp7
+  %tmp10 = load float, float addrspace(4)* %tmp8, align 4
+  %tmp11 = fadd float %tmp5, %tmp10
+  %tmp12 = add i32 %x, 256
+  %tmp13 = sext i32 %tmp12 to i64
+  %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp13, i64 %tmp
+  %tmp16 = load float, float addrspace(4)* %tmp14, align 4
+  %tmp17 = fadd float %tmp11, %tmp16
+  %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(4)* @array2, i64 0, i64 %tmp13, i64 %tmp7
+  %tmp20 = load float, float addrspace(4)* %tmp18, align 4
+  %tmp21 = fadd float %tmp17, %tmp20
+  store float %tmp21, float addrspace(1)* %output, align 4
+  ret void
+}
+
+
+ at lds_array = internal addrspace(3) global [4096 x [4 x float]] undef, align 4
+
+; DS instructions have a larger immediate offset, so make sure these are OK.
+; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 255
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16128
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16383
+define amdgpu_kernel void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
+  %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
+  %tmp4 = load float, float addrspace(3)* %tmp2, align 4
+  %tmp5 = fadd float %tmp4, 0.000000e+00
+  %tmp6 = add i32 %y, 255
+  %tmp8 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %tmp6
+  %tmp10 = load float, float addrspace(3)* %tmp8, align 4
+  %tmp11 = fadd float %tmp5, %tmp10
+  %tmp12 = add i32 %x, 4032
+  %tmp14 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %y
+  %tmp16 = load float, float addrspace(3)* %tmp14, align 4
+  %tmp17 = fadd float %tmp11, %tmp16
+  %tmp18 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %tmp12, i32 %tmp6
+  %tmp20 = load float, float addrspace(3)* %tmp18, align 4
+  %tmp21 = fadd float %tmp17, %tmp20
+  store float %tmp21, float addrspace(1)* %output, align 4
+  ret void
+}
+
+; IR-LABEL: @keep_metadata(
+; IR: getelementptr {{.*}} !amdgpu.uniform
+; IR: getelementptr {{.*}} !amdgpu.uniform
+; IR: getelementptr {{.*}} !amdgpu.uniform
+define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(4)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
+main_body:
+  %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
+  %23 = bitcast float %22 to i32
+  %24 = shl i32 %23, 1
+  %25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(4)* %1, i32 0, i32 %24, !amdgpu.uniform !0
+  %26 = load <8 x i32>, <8 x i32> addrspace(4)* %25, align 32, !invariant.load !0
+  %27 = shl i32 %23, 2
+  %28 = or i32 %27, 3
+  %29 = bitcast [0 x <8 x i32>] addrspace(4)* %1 to [0 x <4 x i32>] addrspace(4)*
+  %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(4)* %29, i32 0, i32 %28, !amdgpu.uniform !0
+  %31 = load <4 x i32>, <4 x i32> addrspace(4)* %30, align 16, !invariant.load !0
+  %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
+  %33 = extractelement <4 x float> %32, i32 0
+  %34 = extractelement <4 x float> %32, i32 1
+  %35 = extractelement <4 x float> %32, i32 2
+  %36 = extractelement <4 x float> %32, i32 3
+  %37 = bitcast float %4 to i32
+  %38 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %37, 4
+  %39 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %33, 5
+  %40 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %34, 6
+  %41 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 7
+  %42 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %36, 8
+  %43 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float %20, 19
+  ret <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
+
+; Function Attrs: nounwind readonly
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7
+
+
+!0 = !{}
+
+attributes #5 = { "InitialPSInputAddr"="45175" }
+attributes #6 = { nounwind readnone speculatable }
+attributes #7 = { nounwind readonly }
+attributes #8 = { nounwind readnone }

Added: llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/lit.local.cfg Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3 @@
+if not 'NVPTX' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll (added)
+++ llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,236 @@
+; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_20 \
+; RUN:     | FileCheck %s --check-prefix=PTX
+; RUN: opt < %s -mtriple=nvptx64-nvidia-cuda -S -separate-const-offset-from-gep \
+; RUN:       -reassociate-geps-verify-no-dead-code -gvn \
+; RUN:     | FileCheck %s --check-prefix=IR
+
+; Verifies the SeparateConstOffsetFromGEP pass.
+; The following code computes
+; *output = array[x][y] + array[x][y+1] + array[x+1][y] + array[x+1][y+1]
+;
+; We expect SeparateConstOffsetFromGEP to transform it to
+;
+; float *base = &a[x][y];
+; *output = base[0] + base[1] + base[32] + base[33];
+;
+; so the backend can emit PTX that uses fewer virtual registers.
+
+ at array = internal addrspace(3) constant [32 x [32 x float]] zeroinitializer, align 4
+
+define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
+.preheader:
+  %0 = sext i32 %y to i64
+  %1 = sext i32 %x to i64
+  %2 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
+  %3 = addrspacecast float addrspace(3)* %2 to float*
+  %4 = load float, float* %3, align 4
+  %5 = fadd float %4, 0.000000e+00
+  %6 = add i32 %y, 1
+  %7 = sext i32 %6 to i64
+  %8 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %7
+  %9 = addrspacecast float addrspace(3)* %8 to float*
+  %10 = load float, float* %9, align 4
+  %11 = fadd float %5, %10
+  %12 = add i32 %x, 1
+  %13 = sext i32 %12 to i64
+  %14 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %0
+  %15 = addrspacecast float addrspace(3)* %14 to float*
+  %16 = load float, float* %15, align 4
+  %17 = fadd float %11, %16
+  %18 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %7
+  %19 = addrspacecast float addrspace(3)* %18 to float*
+  %20 = load float, float* %19, align 4
+  %21 = fadd float %17, %20
+  store float %21, float* %output, align 4
+  ret void
+}
+; PTX-LABEL: sum_of_array(
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+
+; IR-LABEL: @sum_of_array(
+; TODO: GVN is unable to preserve the "inbounds" keyword on the first GEP. Need
+; some infrastructure changes to enable such optimizations.
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
+
+; @sum_of_array2 is very similar to @sum_of_array. The only difference is in
+; the order of "sext" and "add" when computing the array indices. @sum_of_array
+; computes add before sext, e.g., array[sext(x + 1)][sext(y + 1)], while
+; @sum_of_array2 computes sext before add,
+; e.g., array[sext(x) + 1][sext(y) + 1]. SeparateConstOffsetFromGEP should be
+; able to extract constant offsets from both forms.
+define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
+.preheader:
+  %0 = sext i32 %y to i64
+  %1 = sext i32 %x to i64
+  %2 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
+  %3 = addrspacecast float addrspace(3)* %2 to float*
+  %4 = load float, float* %3, align 4
+  %5 = fadd float %4, 0.000000e+00
+  %6 = add i64 %0, 1
+  %7 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %6
+  %8 = addrspacecast float addrspace(3)* %7 to float*
+  %9 = load float, float* %8, align 4
+  %10 = fadd float %5, %9
+  %11 = add i64 %1, 1
+  %12 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %0
+  %13 = addrspacecast float addrspace(3)* %12 to float*
+  %14 = load float, float* %13, align 4
+  %15 = fadd float %10, %14
+  %16 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %6
+  %17 = addrspacecast float addrspace(3)* %16 to float*
+  %18 = load float, float* %17, align 4
+  %19 = fadd float %15, %18
+  store float %19, float* %output, align 4
+  ret void
+}
+; PTX-LABEL: sum_of_array2(
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+
+; IR-LABEL: @sum_of_array2(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
+
+
+; This function loads
+;   array[zext(x)][zext(y)]
+;   array[zext(x)][zext(y +nuw 1)]
+;   array[zext(x +nuw 1)][zext(y)]
+;   array[zext(x +nuw 1)][zext(y +nuw 1)].
+;
+; This function is similar to @sum_of_array, but it
+; 1) extends array indices using zext instead of sext;
+; 2) annotates the addition with "nuw"; otherwise, zext(x + 1) => zext(x) + 1
+;    may be invalid.
+define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
+.preheader:
+  %0 = zext i32 %y to i64
+  %1 = zext i32 %x to i64
+  %2 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
+  %3 = addrspacecast float addrspace(3)* %2 to float*
+  %4 = load float, float* %3, align 4
+  %5 = fadd float %4, 0.000000e+00
+  %6 = add nuw i32 %y, 1
+  %7 = zext i32 %6 to i64
+  %8 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %7
+  %9 = addrspacecast float addrspace(3)* %8 to float*
+  %10 = load float, float* %9, align 4
+  %11 = fadd float %5, %10
+  %12 = add nuw i32 %x, 1
+  %13 = zext i32 %12 to i64
+  %14 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %0
+  %15 = addrspacecast float addrspace(3)* %14 to float*
+  %16 = load float, float* %15, align 4
+  %17 = fadd float %11, %16
+  %18 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %7
+  %19 = addrspacecast float addrspace(3)* %18 to float*
+  %20 = load float, float* %19, align 4
+  %21 = fadd float %17, %20
+  store float %21, float* %output, align 4
+  ret void
+}
+; PTX-LABEL: sum_of_array3(
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+
+; IR-LABEL: @sum_of_array3(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
+
+
+; This function loads
+;   array[zext(x)][zext(y)]
+;   array[zext(x)][zext(y)]
+;   array[zext(x) + 1][zext(y) + 1]
+;   array[zext(x) + 1][zext(y) + 1].
+;
+; We expect the generated code to reuse the computation of
+; &array[zext(x)][zext(y)]. See the expected IR and PTX for details.
+define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {
+.preheader:
+  %0 = zext i32 %y to i64
+  %1 = zext i32 %x to i64
+  %2 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
+  %3 = addrspacecast float addrspace(3)* %2 to float*
+  %4 = load float, float* %3, align 4
+  %5 = fadd float %4, 0.000000e+00
+  %6 = add i64 %0, 1
+  %7 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %6
+  %8 = addrspacecast float addrspace(3)* %7 to float*
+  %9 = load float, float* %8, align 4
+  %10 = fadd float %5, %9
+  %11 = add i64 %1, 1
+  %12 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %0
+  %13 = addrspacecast float addrspace(3)* %12 to float*
+  %14 = load float, float* %13, align 4
+  %15 = fadd float %10, %14
+  %16 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %6
+  %17 = addrspacecast float addrspace(3)* %16 to float*
+  %18 = load float, float* %17, align 4
+  %19 = fadd float %15, %18
+  store float %19, float* %output, align 4
+  ret void
+}
+; PTX-LABEL: sum_of_array4(
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+
+; IR-LABEL: @sum_of_array4(
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
+
+
+; The source code is:
+;   p0 = &input[sext(x + y)];
+;   p1 = &input[sext(x + (y + 5))];
+;
+; Without reuniting extensions, SeparateConstOffsetFromGEP would emit
+;   p0 = &input[sext(x + y)];
+;   t1 = &input[sext(x) + sext(y)];
+;   p1 = &t1[5];
+;
+; With reuniting extensions, it merges p0 and t1 and thus emits
+;   p0 = &input[sext(x + y)];
+;   p1 = &p0[5];
+define void @reunion(i32 %x, i32 %y, float* %input) {
+; IR-LABEL: @reunion(
+; PTX-LABEL: reunion(
+entry:
+  %xy = add nsw i32 %x, %y
+  %0 = sext i32 %xy to i64
+  %p0 = getelementptr inbounds float, float* %input, i64 %0
+  %v0 = load float, float* %p0, align 4
+; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0:%rd[0-9]+]]{{\]}}
+  call void @use(float %v0)
+
+  %y5 = add nsw i32 %y, 5
+  %xy5 = add nsw i32 %x, %y5
+  %1 = sext i32 %xy5 to i64
+  %p1 = getelementptr inbounds float, float* %input, i64 %1
+; IR: getelementptr inbounds float, float* %p0, i64 5
+  %v1 = load float, float* %p1, align 4
+; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0]]+20{{\]}}
+  call void @use(float %v1)
+
+  ret void
+}
+
+declare void @use(float)

Added: llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll (added)
+++ llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,300 @@
+; RUN: opt < %s -mtriple=nvptx64-nvidia-cuda -separate-const-offset-from-gep \
+; RUN:       -reassociate-geps-verify-no-dead-code -S | FileCheck %s
+
+; Several unit tests for -separate-const-offset-from-gep. The transformation
+; heavily relies on TargetTransformInfo, so we put these tests under
+; target-specific folders.
+
+%struct.S = type { float, double }
+
+ at struct_array = global [1024 x %struct.S] zeroinitializer, align 16
+ at float_2d_array = global [32 x [32 x float]] zeroinitializer, align 4
+
+; We should not extract any struct field indices, because fields in a struct
+; may have different types.
+define double* @struct(i32 %i) {
+entry:
+  %add = add nsw i32 %i, 5
+  %idxprom = sext i32 %add to i64
+  %p = getelementptr inbounds [1024 x %struct.S], [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
+  ret double* %p
+}
+; CHECK-LABEL: @struct(
+; CHECK: getelementptr [1024 x %struct.S], [1024 x %struct.S]* @struct_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1
+
+; We should be able to trace into sext(a + b) if a + b is non-negative
+; (e.g., used as an index of an inbounds GEP) and one of a and b is
+; non-negative.
+define float* @sext_add(i32 %i, i32 %j) {
+entry:
+  %0 = add i32 %i, 1
+  %1 = sext i32 %0 to i64  ; inbound sext(i + 1) = sext(i) + 1
+  %2 = add i32 %j, -2
+  ; However, inbound sext(j + -2) != sext(j) + -2, e.g., j = INT_MIN
+  %3 = sext i32 %2 to i64
+  %p = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %1, i64 %3
+  ret float* %p
+}
+; CHECK-LABEL: @sext_add(
+; CHECK-NOT: = add
+; CHECK: add i32 %j, -2
+; CHECK: sext
+; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 32
+
+; We should be able to trace into sext/zext if it can be distributed to both
+; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
+;
+; This test verifies we can transform
+;   gep base, a + sext(b +nsw 1), c + zext(d +nuw 1)
+; to
+;   gep base, a + sext(b), c + zext(d); gep ..., 1 * 32 + 1
+define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) {
+  %b1 = add nsw i32 %b, 1
+  %b2 = sext i32 %b1 to i64
+  %i = add i64 %a, %b2       ; i = a + sext(b +nsw 1)
+  %d1 = add nuw i32 %d, 1
+  %d2 = zext i32 %d1 to i64
+  %j = add i64 %c, %d2       ; j = c + zext(d +nuw 1)
+  %p = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %j
+  ret float* %p
+}
+; CHECK-LABEL: @ext_add_no_overflow(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 33
+
+; Verifies we handle nested sext/zext correctly.
+define void @sext_zext(i32 %a, i32 %b, float** %out1, float** %out2) {
+entry:
+  %0 = add nsw nuw i32 %a, 1
+  %1 = sext i32 %0 to i48
+  %2 = zext i48 %1 to i64    ; zext(sext(a +nsw nuw 1)) = zext(sext(a)) + 1
+  %3 = add nsw i32 %b, 2
+  %4 = sext i32 %3 to i48
+  %5 = zext i48 %4 to i64    ; zext(sext(b +nsw 2)) != zext(sext(b)) + 2
+  %p1 = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %2, i64 %5
+  store float* %p1, float** %out1
+  %6 = add nuw i32 %a, 3
+  %7 = zext i32 %6 to i48
+  %8 = sext i48 %7 to i64 ; sext(zext(a +nuw 3)) = zext(a +nuw 3) = zext(a) + 3
+  %9 = add nsw i32 %b, 4
+  %10 = zext i32 %9 to i48
+  %11 = sext i48 %10 to i64  ; sext(zext(b +nsw 4)) != zext(b) + 4
+  %p2 = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %8, i64 %11
+  store float* %p2, float** %out2
+  ret void
+}
+; CHECK-LABEL: @sext_zext(
+; CHECK: [[BASE_PTR_1:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float, float* [[BASE_PTR_1]], i64 32
+; CHECK: [[BASE_PTR_2:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float, float* [[BASE_PTR_2]], i64 96
+
+; Similar to @ext_add_no_overflow, we should be able to trace into s/zext if
+; its operand is an OR and the two operands of the OR have no common bits.
+define float* @sext_or(i64 %a, i32 %b) {
+entry:
+  %b1 = shl i32 %b, 2
+  %b2 = or i32 %b1, 1 ; (b << 2) and 1 have no common bits
+  %b3 = or i32 %b1, 4 ; (b << 2) and 4 may have common bits
+  %b2.ext = zext i32 %b2 to i64
+  %b3.ext = sext i32 %b3 to i64
+  %i = add i64 %a, %b2.ext
+  %j = add i64 %a, %b3.ext
+  %p = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %j
+  ret float* %p
+}
+; CHECK-LABEL: @sext_or(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 32
+
+; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
+; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
+; affected.
+define float* @expr(i64 %a, i64 %b, i64* %out) {
+entry:
+  %b5 = add i64 %b, 5
+  %i = add i64 %b5, %a
+  %p = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 0
+  store i64 %b5, i64* %out
+  ret float* %p
+}
+; CHECK-LABEL: @expr(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 160
+; CHECK: store i64 %b5, i64* %out
+
+; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8
+define float* @sext_expr(i32 %a, i32 %b, i32 %c, i64 %d) {
+entry:
+  %0 = add nsw i32 %c, 8
+  %1 = add nsw i32 %b, %0
+  %2 = add nsw i32 %a, %1
+  %3 = sext i32 %2 to i64
+  %i = add i64 %d, %3
+  %p = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %i
+  ret float* %p
+}
+; CHECK-LABEL: @sext_expr(
+; CHECK: sext i32
+; CHECK: sext i32
+; CHECK: sext i32
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 8
+
+; Verifies we handle "sub" correctly.
+define float* @sub(i64 %i, i64 %j) {
+  %i2 = sub i64 %i, 5 ; i - 5
+  %j2 = sub i64 5, %j ; 5 - i
+  %p = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i2, i64 %j2
+  ret float* %p
+}
+; CHECK-LABEL: @sub(
+; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 -155
+
+%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
+
+; Verifies we can emit correct uglygep if the address is not natually aligned.
+define i64* @packed_struct(i32 %i, i32 %j) {
+entry:
+  %s = alloca [1024 x %struct.Packed], align 16
+  %add = add nsw i32 %j, 3
+  %idxprom = sext i32 %add to i64
+  %add1 = add nsw i32 %i, 1
+  %idxprom2 = sext i32 %add1 to i64
+  %arrayidx3 = getelementptr inbounds [1024 x %struct.Packed], [1024 x %struct.Packed]* %s, i64 0, i64 %idxprom2, i32 1, i64 %idxprom
+  ret i64* %arrayidx3
+}
+; CHECK-LABEL: @packed_struct(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed], [1024 x %struct.Packed]* %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: [[CASTED_PTR:%[a-zA-Z0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
+; CHECK: %uglygep = getelementptr inbounds i8, i8* [[CASTED_PTR]], i64 100
+; CHECK: bitcast i8* %uglygep to i64*
+
+; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))",
+; because "zext(b + 8) != zext(b) + 8"
+define float* @zext_expr(i32 %a, i32 %b) {
+entry:
+  %0 = add i32 %b, 8
+  %1 = add nuw i32 %a, %0
+  %i = zext i32 %1 to i64
+  %p = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %i
+  ret float* %p
+}
+; CHECK-LABEL: zext_expr(
+; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %i
+
+; Per http://llvm.org/docs/LangRef.html#id181, the indices of a off-bound gep
+; should be considered sign-extended to the pointer size. Therefore,
+;   gep base, (add i32 a, b) != gep (gep base, i32 a), i32 b
+; because
+;   sext(a + b) != sext(a) + sext(b)
+;
+; This test verifies we do not illegitimately extract the 8 from
+;   gep base, (i32 a + 8)
+define float* @i32_add(i32 %a) {
+entry:
+  %i = add i32 %a, 8
+  %p = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i32 %i
+  ret float* %p
+}
+; CHECK-LABEL: @i32_add(
+; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %{{[a-zA-Z0-9]+}}
+; CHECK-NOT: getelementptr
+
+; Verifies that we compute the correct constant offset when the index is
+; sign-extended and then zero-extended. The old version of our code failed to
+; handle this case because it simply computed the constant offset as the
+; sign-extended value of the constant part of the GEP index.
+define float* @apint(i1 %a) {
+entry:
+  %0 = add nsw nuw i1 %a, 1
+  %1 = sext i1 %0 to i4
+  %2 = zext i4 %1 to i64         ; zext (sext i1 1 to i4) to i64 = 15
+  %p = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %2
+  ret float* %p
+}
+; CHECK-LABEL: @apint(
+; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %{{[a-zA-Z0-9]+}}
+; CHECK: getelementptr float, float* [[BASE_PTR]], i64 15
+
+; Do not trace into binary operators other than ADD, SUB, and OR.
+define float* @and(i64 %a) {
+entry:
+  %0 = shl i64 %a, 2
+  %1 = and i64 %0, 1
+  %p = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 0, i64 %1
+  ret float* %p
+}
+; CHECK-LABEL: @and(
+; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array
+; CHECK-NOT: getelementptr
+
+; The code that rebuilds an OR expression used to be buggy, and failed on this
+; test.
+define float* @shl_add_or(i64 %a, float* %ptr) {
+; CHECK-LABEL: @shl_add_or(
+entry:
+  %shl = shl i64 %a, 2
+  %add = add i64 %shl, 12
+  %or = or i64 %add, 1
+; CHECK: [[OR:%or[0-9]*]] = add i64 %shl, 1
+  ; ((a << 2) + 12) and 1 have no common bits. Therefore,
+  ; SeparateConstOffsetFromGEP is able to extract the 12.
+  ; TODO(jingyue): We could reassociate the expression to combine 12 and 1.
+  %p = getelementptr float, float* %ptr, i64 %or
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr float, float* %ptr, i64 [[OR]]
+; CHECK: getelementptr float, float* [[PTR]], i64 12
+  ret float* %p
+; CHECK-NEXT: ret
+}
+
+; The source code used to be buggy in checking
+; (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0)
+; where AccumulativeByteOffset is signed but ElementTypeSizeOfGEP is unsigned.
+; The compiler would promote AccumulativeByteOffset to unsigned, causing
+; unexpected results. For example, while -64 % (int64_t)24 != 0,
+; -64 % (uint64_t)24 == 0.
+%struct3 = type { i64, i32 }
+%struct2 = type { %struct3, i32 }
+%struct1 = type { i64, %struct2 }
+%struct0 = type { i32, i32, i64*, [100 x %struct1] }
+define %struct2* @sign_mod_unsign(%struct0* %ptr, i64 %idx) {
+; CHECK-LABEL: @sign_mod_unsign(
+entry:
+  %arrayidx = add nsw i64 %idx, -2
+; CHECK-NOT: add
+  %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
+; CHECK: getelementptr inbounds %struct2, %struct2* [[PTR]], i64 -3
+  ret %struct2* %ptr2
+; CHECK-NEXT: ret
+}
+
+; Check that we can see through explicit trunc() instruction.
+define %struct2* @trunk_explicit(%struct0* %ptr, i64 %idx) {
+; CHECK-LABEL: @trunk_explicit(
+entry:
+  %idx0 = trunc i64 1 to i32
+  %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i32 %idx0, i32 3, i64 %idx, i32 1
+; CHECK-NOT: trunc
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
+; CHECK: getelementptr inbounds %struct2, %struct2* %0, i64 151
+  ret %struct2* %ptr2
+; CHECK-NEXT: ret
+}
+
+; Check that we can deal with trunc inserted by
+; canonicalizeArrayIndicesToPointerSize() if size of an index is larger than
+; that of the pointer.
+define %struct2* @trunk_long_idx(%struct0* %ptr, i64 %idx) {
+; CHECK-LABEL: @trunk_long_idx(
+entry:
+  %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i65 1, i32 3, i64 %idx, i32 1
+; CHECK-NOT: trunc
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
+; CHECK: getelementptr inbounds %struct2, %struct2* %0, i64 151
+  ret %struct2* %ptr2
+; CHECK-NEXT: ret
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2006-06-13-SingleEntryPHI.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+	%struct.BLEND_MAP = type { i16, i16, i16, i32, %struct.BLEND_MAP_ENTRY* }
+	%struct.BLEND_MAP_ENTRY = type { float, i8, { [5 x float], [4 x i8] } }
+	%struct.TPATTERN = type { i16, i16, i16, i32, float, float, float, %struct.WARP*, %struct.TPATTERN*, %struct.BLEND_MAP*, { %struct.anon, [4 x i8] } }
+	%struct.TURB = type { i16, %struct.WARP*, [3 x double], i32, float, float }
+	%struct.WARP = type { i16, %struct.WARP* }
+	%struct.anon = type { float, [3 x double] }
+
+define void @Parse_Pattern() {
+entry:
+	br label %bb1096.outer20
+bb671:		; preds = %cond_true1099
+	br label %bb1096.outer23
+bb1096.outer20.loopexit:		; preds = %cond_true1099
+	%Local_Turb.0.ph24.lcssa = phi %struct.TURB* [ %Local_Turb.0.ph24, %cond_true1099 ]		; <%struct.TURB*> [#uses=1]
+	br label %bb1096.outer20
+bb1096.outer20:		; preds = %bb1096.outer20.loopexit, %entry
+	%Local_Turb.0.ph22 = phi %struct.TURB* [ undef, %entry ], [ %Local_Turb.0.ph24.lcssa, %bb1096.outer20.loopexit ]		; <%struct.TURB*> [#uses=1]
+	%tmp1098 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br label %bb1096.outer23
+bb1096.outer23:		; preds = %bb1096.outer20, %bb671
+	%Local_Turb.0.ph24 = phi %struct.TURB* [ %Local_Turb.0.ph22, %bb1096.outer20 ], [ null, %bb671 ]		; <%struct.TURB*> [#uses=2]
+	br label %bb1096
+bb1096:		; preds = %cond_true1099, %bb1096.outer23
+	br i1 %tmp1098, label %cond_true1099, label %bb1102
+cond_true1099:		; preds = %bb1096
+	switch i32 0, label %bb1096.outer20.loopexit [
+		 i32 161, label %bb671
+		 i32 359, label %bb1096
+	]
+bb1102:		; preds = %bb1096
+	%Local_Turb.0.ph24.lcssa1 = phi %struct.TURB* [ %Local_Turb.0.ph24, %bb1096 ]		; <%struct.TURB*> [#uses=0]
+	ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2006-06-27-DeadSwitchCase.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+define void @init_caller_save() {
+entry:
+  br label %cond_true78
+
+cond_true78:    ; preds = %bb75, %entry
+  %i.0.0 = phi i32 [ 0, %entry ], [ %tmp74.0, %bb75 ]    ; <i32> [#uses=2]
+  br label %bb54
+
+bb54:    ; preds = %cond_true78, %bb31
+  br i1 false, label %bb75, label %cond_true64
+
+cond_true64:    ; preds = %bb54
+  switch i32 %i.0.0, label %cond_next20 [
+     i32 17, label %bb31
+     i32 18, label %bb31
+  ]
+
+cond_next20:    ; preds = %cond_true64
+  br label %bb31
+
+bb31:    ; preds = %cond_true64, %cond_true64, %cond_next20
+  %iftmp.29.1 = phi i32 [ 0, %cond_next20 ], [ 0, %cond_true64 ], [ 0, %cond_true64 ]    ; <i32> [#uses=0]
+  br label %bb54
+
+bb75:    ; preds = %bb54
+  %tmp74.0 = add i32 %i.0.0, 1    ; <i32> [#uses=1]
+  br label %cond_true78
+}
+

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-05-09-Unreachable.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; PR1333
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+	%struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
+	%struct.ada__tags__dispatch_table = type { [1 x i8*] }
+	%struct.quotes__T173s = type { i8, %struct.quotes__T173s__T174s, [2 x [1 x double]], [2 x i16], i64, i8 }
+	%struct.quotes__T173s__T174s = type { i8, i8, i8, i16, i16, [2 x [1 x double]] }
+
+define void @quotes__write_quote() {
+entry:
+	%tmp606.i = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br label %bb
+bb:		; preds = %cond_next73, %bb, %entry
+	br i1 false, label %bb51, label %bb
+bb51:		; preds = %cond_next73, %bb
+	br i1 %tmp606.i, label %quotes__bid_ask_depth_offset_matrices__get_price.exit, label %cond_true.i
+cond_true.i:		; preds = %bb51
+	unreachable
+quotes__bid_ask_depth_offset_matrices__get_price.exit:		; preds = %bb51
+	br i1 false, label %cond_next73, label %cond_true72
+cond_true72:		; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit
+	unreachable
+cond_next73:		; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit
+	br i1 false, label %bb, label %bb51
+}
+

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-05-09-tl.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,96 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR1333
+
+define void @pp_cxx_expression() {
+entry:
+	%tmp6 = lshr i32 0, 24		; <i32> [#uses=1]
+	br label %tailrecurse
+
+tailrecurse:		; preds = %tailrecurse, %tailrecurse, %entry
+	switch i32 %tmp6, label %bb96 [
+		 i32 24, label %bb10
+		 i32 25, label %bb10
+		 i32 28, label %bb10
+		 i32 29, label %bb48
+		 i32 31, label %bb48
+		 i32 32, label %bb48
+		 i32 33, label %bb48
+		 i32 34, label %bb48
+		 i32 36, label %bb15
+		 i32 51, label %bb89
+		 i32 52, label %bb89
+		 i32 54, label %bb83
+		 i32 57, label %bb59
+		 i32 63, label %bb80
+		 i32 64, label %bb80
+		 i32 68, label %bb80
+		 i32 169, label %bb75
+		 i32 170, label %bb19
+		 i32 171, label %bb63
+		 i32 172, label %bb63
+		 i32 173, label %bb67
+		 i32 174, label %bb67
+		 i32 175, label %bb19
+		 i32 176, label %bb75
+		 i32 178, label %bb59
+		 i32 179, label %bb89
+		 i32 180, label %bb59
+		 i32 182, label %bb48
+		 i32 183, label %bb48
+		 i32 184, label %bb48
+		 i32 185, label %bb48
+		 i32 186, label %bb48
+		 i32 195, label %bb48
+		 i32 196, label %bb59
+		 i32 197, label %bb89
+		 i32 198, label %bb70
+		 i32 199, label %bb59
+		 i32 200, label %bb59
+		 i32 201, label %bb59
+		 i32 202, label %bb59
+		 i32 203, label %bb75
+		 i32 204, label %bb59
+		 i32 205, label %tailrecurse
+		 i32 210, label %tailrecurse
+	]
+
+bb10:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb15:		; preds = %tailrecurse
+	ret void
+
+bb19:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb48:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb59:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb63:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb67:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb70:		; preds = %tailrecurse
+	ret void
+
+bb75:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb80:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb83:		; preds = %tailrecurse
+	ret void
+
+bb89:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb96:		; preds = %tailrecurse
+	ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-12-ExitDomInfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,46 @@
+; RUN: opt < %s -simple-loop-unswitch -instcombine -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -instcombine -disable-output
+
+ at str3 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
+
+define i32 @stringSearch_Clib(i32 %count) {
+entry:
+	%ttmp25 = icmp sgt i32 %count, 0		; <i1> [#uses=1]
+	br i1 %ttmp25, label %bb36.preheader, label %bb44
+
+bb36.preheader:		; preds = %entry
+	%ttmp33 = icmp slt i32 0, 250		; <i1> [#uses=1]
+	br label %bb36.outer
+
+bb36.outer:		; preds = %bb41, %bb36.preheader
+	br i1 %ttmp33, label %bb.nph, label %bb41
+
+bb.nph:		; preds = %bb36.outer
+	%ttmp8 = icmp eq i8* null, null		; <i1> [#uses=1]
+	%ttmp6 = icmp eq i8* null, null		; <i1> [#uses=1]
+	%tmp31 = call i32 @strcspn( i8* null, i8* getelementptr ([3 x i8], [3 x i8]* @str3, i64 0, i64 0) )		; <i32> [#uses=1]
+	br i1 %ttmp8, label %cond_next, label %cond_true
+
+cond_true:		; preds = %bb.nph
+	ret i32 0
+
+cond_next:		; preds = %bb.nph
+	br i1 %ttmp6, label %cond_next28, label %cond_true20
+
+cond_true20:		; preds = %cond_next
+	ret i32 0
+
+cond_next28:		; preds = %cond_next
+	%tmp33 = add i32 %tmp31, 0		; <i32> [#uses=1]
+	br label %bb41
+
+bb41:		; preds = %cond_next28, %bb36.outer
+	%c.2.lcssa = phi i32 [ 0, %bb36.outer ], [ %tmp33, %cond_next28 ]		; <i32> [#uses=1]
+	br i1 false, label %bb36.outer, label %bb44
+
+bb44:		; preds = %bb41, %entry
+	%c.01.1 = phi i32 [ 0, %entry ], [ %c.2.lcssa, %bb41 ]		; <i32> [#uses=1]
+	ret i32 %c.01.1
+}
+
+declare i32 @strcspn(i8*, i8*)

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-13-DomInfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%tmp1785365 = icmp ult i32 0, 100		; <i1> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %entry
+	br i1 false, label %cond_true, label %cond_next
+
+cond_true:		; preds = %bb
+	br i1 %tmp1785365, label %bb, label %bb1788
+
+cond_next:		; preds = %bb
+	%iftmp.1.0 = select i1 false, i32 0, i32 0		; <i32> [#uses=1]
+	br i1 false, label %cond_true47, label %cond_next74
+
+cond_true47:		; preds = %cond_next
+	%tmp53 = urem i32 %iftmp.1.0, 0		; <i32> [#uses=0]
+	ret i32 0
+
+cond_next74:		; preds = %cond_next
+	ret i32 0
+
+bb1788:		; preds = %cond_true
+	ret i32 0
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-07-18-DomInfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR1559
+
+target triple = "i686-pc-linux-gnu"
+	%struct.re_pattern_buffer = type { i8*, i32, i32, i32, i8*, i8*, i32, i8 }
+
+define fastcc i32 @byte_regex_compile(i8* %pattern, i32 %size, i32 %syntax, %struct.re_pattern_buffer* %bufp) {
+entry:
+        br i1 false, label %bb147, label %cond_next123
+
+cond_next123:           ; preds = %entry
+        ret i32 0
+
+bb147:          ; preds = %entry
+        switch i32 0, label %normal_char [
+                 i32 91, label %bb1734
+                 i32 92, label %bb5700
+        ]
+
+bb1734:         ; preds = %bb147
+        br label %bb1855.outer.outer
+
+cond_true1831:          ; preds = %bb1855.outer
+        br i1 %tmp1837, label %cond_next1844, label %cond_true1840
+
+cond_true1840:          ; preds = %cond_true1831
+        ret i32 0
+
+cond_next1844:          ; preds = %cond_true1831
+        br i1 false, label %bb1855.outer, label %cond_true1849
+
+cond_true1849:          ; preds = %cond_next1844
+        br label %bb1855.outer.outer
+
+bb1855.outer.outer:             ; preds = %cond_true1849, %bb1734
+        %b.10.ph.ph = phi i8* [ null, %cond_true1849 ], [ null, %bb1734 ]               ; <i8*> [#uses=1]
+        br label %bb1855.outer
+
+bb1855.outer:           ; preds = %bb1855.outer.outer, %cond_next1844
+        %b.10.ph = phi i8* [ null, %cond_next1844 ], [ %b.10.ph.ph, %bb1855.outer.outer ]               ; <i8*> [#uses=1]
+        %tmp1837 = icmp eq i8* null, null               ; <i1> [#uses=2]
+        br i1 false, label %cond_true1831, label %cond_next1915
+
+cond_next1915:          ; preds = %cond_next1961, %bb1855.outer
+        store i8* null, i8** null
+        br i1 %tmp1837, label %cond_next1929, label %cond_true1923
+
+cond_true1923:          ; preds = %cond_next1915
+        ret i32 0
+
+cond_next1929:          ; preds = %cond_next1915
+        br i1 false, label %cond_next1961, label %cond_next2009
+
+cond_next1961:          ; preds = %cond_next1929
+        %tmp1992 = getelementptr i8, i8* %b.10.ph, i32 0            ; <i8*> [#uses=0]
+        br label %cond_next1915
+
+cond_next2009:          ; preds = %cond_next1929
+        ret i32 0
+
+bb5700:         ; preds = %bb147
+        ret i32 0
+
+normal_char:            ; preds = %bb147
+        ret i32 0
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-08-01-Dom.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt < %s -licm -simple-loop-unswitch -disable-output 
+; PR 1589
+
+      	%struct.QBasicAtomic = type { i32 }
+
+define void @_ZNK5QDate9addMonthsEi(%struct.QBasicAtomic* sret  %agg.result, %struct.QBasicAtomic* %this, i32 %nmonths) {
+entry:
+	br label %cond_true90
+
+bb16:		; preds = %cond_true90
+	br i1 false, label %bb93, label %cond_true90
+
+bb45:		; preds = %cond_true90
+	br i1 false, label %bb53, label %bb58
+
+bb53:		; preds = %bb45
+	br i1 false, label %bb93, label %cond_true90
+
+bb58:		; preds = %bb45
+	store i32 0, i32* null, align 4
+	br i1 false, label %cond_true90, label %bb93
+
+cond_true90:		; preds = %bb58, %bb53, %bb16, %entry
+	%nmonths_addr.016.1 = phi i32 [ %nmonths, %entry ], [ 0, %bb16 ], [ 0, %bb53 ], [ %nmonths_addr.016.1, %bb58 ]		; <i32> [#uses=2]
+	%tmp14 = icmp slt i32 %nmonths_addr.016.1, -11		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb16, label %bb45
+
+bb93:		; preds = %bb58, %bb53, %bb16
+	ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt < %s -simple-loop-unswitch -instcombine -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -instcombine -disable-output
+	%struct.ClassDef = type { %struct.QByteArray, %struct.QByteArray, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", i8, i8, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", i32, i32 }
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct.Generator = type { %struct.FILE*, %struct.ClassDef*, %"struct.QList<ArgumentDef>", %struct.QByteArray, %"struct.QList<ArgumentDef>" }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QByteArray = type { %"struct.QByteArray::Data"* }
+	%"struct.QByteArray::Data" = type { %struct.QBasicAtomic, i32, i32, i8*, [1 x i8] }
+	%"struct.QList<ArgumentDef>" = type { %"struct.QList<ArgumentDef>::._19" }
+	%"struct.QList<ArgumentDef>::._19" = type { %struct.QListData }
+	%struct.QListData = type { %"struct.QListData::Data"* }
+	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
+	%"struct.QMap<QByteArray,QByteArray>" = type { %"struct.QMap<QByteArray,QByteArray>::._56" }
+	%"struct.QMap<QByteArray,QByteArray>::._56" = type { %struct.QMapData* }
+	%struct.QMapData = type { %struct.QMapData*, [12 x %struct.QMapData*], %struct.QBasicAtomic, i32, i32, i32, i8 }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+ at .str9 = external constant [1 x i8]		; <[1 x i8]*> [#uses=1]
+
+declare i32 @strcmp(i8*, i8*)
+
+define i32 @_ZN9Generator6strregEPKc(%struct.Generator* %this, i8* %s) {
+entry:
+	%s_addr.0 = select i1 false, i8* getelementptr ([1 x i8], [1 x i8]* @.str9, i32 0, i32 0), i8* %s		; <i8*> [#uses=2]
+	%tmp122 = icmp eq i8* %s_addr.0, null		; <i1> [#uses=1]
+	br label %bb184
+
+bb55:		; preds = %bb184
+	ret i32 0
+
+bb88:		; preds = %bb184
+	br i1 %tmp122, label %bb154, label %bb128
+
+bb128:		; preds = %bb88
+	%tmp138 = call i32 @strcmp( i8* null, i8* %s_addr.0 )		; <i32> [#uses=1]
+	%iftmp.37.0.in4 = icmp eq i32 %tmp138, 0		; <i1> [#uses=1]
+	br i1 %iftmp.37.0.in4, label %bb250, label %bb166
+
+bb154:		; preds = %bb88
+	br i1 false, label %bb250, label %bb166
+
+bb166:		; preds = %bb154, %bb128
+	%tmp175 = add i32 %idx.0, 1		; <i32> [#uses=1]
+	%tmp177 = add i32 %tmp175, 0		; <i32> [#uses=1]
+	%tmp181 = add i32 %tmp177, 0		; <i32> [#uses=1]
+	%tmp183 = add i32 %i33.0, 1		; <i32> [#uses=1]
+	br label %bb184
+
+bb184:		; preds = %bb166, %entry
+	%i33.0 = phi i32 [ 0, %entry ], [ %tmp183, %bb166 ]		; <i32> [#uses=2]
+	%idx.0 = phi i32 [ 0, %entry ], [ %tmp181, %bb166 ]		; <i32> [#uses=2]
+	%tmp49 = icmp slt i32 %i33.0, 0		; <i1> [#uses=1]
+	br i1 %tmp49, label %bb88, label %bb55
+
+bb250:		; preds = %bb154, %bb128
+	ret i32 %idx.0
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2007-10-04-DomFrontier.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -licm -loop-unroll -disable-output
+
+ at resonant = external global i32		; <i32*> [#uses=2]
+
+define void @weightadj() {
+entry:
+	br label %bb
+
+bb:		; preds = %bb158, %entry
+	store i32 0, i32* @resonant, align 4
+	br i1 false, label %g.exit, label %bb158
+
+g.exit:		; preds = %bb68, %bb
+	br i1 false, label %bb68, label %cond_true
+
+cond_true:		; preds = %g.exit
+	store i32 1, i32* @resonant, align 4
+	br label %bb68
+
+bb68:		; preds = %cond_true, %g.exit
+	%tmp71 = icmp slt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp71, label %g.exit, label %bb158
+
+bb158:		; preds = %bb68, %bb
+	br i1 false, label %bb, label %return
+
+return:		; preds = %bb158
+	ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2008-06-02-DomInfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt < %s -simple-loop-unswitch -instcombine -gvn -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -instcombine -gvn -disable-output
+; PR2372
+target triple = "i386-pc-linux-gnu"
+
+define i32 @func_3(i16 signext  %p_5, i16 signext  %p_6) nounwind  {
+entry:
+	%tmp3 = icmp eq i16 %p_5, 0		; <i1> [#uses=1]
+	%tmp1314 = sext i16 %p_6 to i32		; <i32> [#uses=1]
+	%tmp28 = icmp ugt i32 %tmp1314, 3		; <i1> [#uses=1]
+	%bothcond = or i1 %tmp28, false		; <i1> [#uses=1]
+	br label %bb
+bb:		; preds = %bb54, %entry
+	br i1 %tmp3, label %bb54, label %bb5
+bb5:		; preds = %bb
+	br i1 %bothcond, label %bb54, label %bb31
+bb31:		; preds = %bb5
+	br label %bb54
+bb54:		; preds = %bb31, %bb5, %bb
+	br i1 false, label %bb64, label %bb
+bb64:		; preds = %bb54
+	%tmp6566 = sext i16 %p_6 to i32		; <i32> [#uses=1]
+	%tmp68 = tail call i32 (...) @func_18( i32 1, i32 %tmp6566, i32 1 ) nounwind 		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @func_18(...)

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2008-06-17-DomFrontier.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt < %s -licm -simple-loop-unswitch -disable-output
+ at g_56 = external global i16		; <i16*> [#uses=2]
+
+define i32 @func_67(i32 %p_68, i8 signext  %p_69, i8 signext  %p_71) nounwind  {
+entry:
+	br label %bb
+bb:		; preds = %bb44, %entry
+	br label %bb3
+bb3:		; preds = %bb36, %bb
+	%bothcond = or i1 false, false		; <i1> [#uses=1]
+	br i1 %bothcond, label %bb29, label %bb19
+bb19:		; preds = %bb3
+	br i1 false, label %bb36, label %bb29
+bb29:		; preds = %bb19, %bb3
+	ret i32 0
+bb36:		; preds = %bb19
+	store i16 0, i16* @g_56, align 2
+	br i1 false, label %bb44, label %bb3
+bb44:		; preds = %bb44, %bb36
+	%tmp46 = load i16, i16* @g_56, align 2		; <i16> [#uses=0]
+	br i1 false, label %bb, label %bb44
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2010-11-18-LCSSA.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -simple-loop-unswitch
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa
+; PR8622
+ at g_38 = external global i32, align 4
+
+define void @func_67(i32 %p_68.coerce) nounwind {
+entry:
+  br i1 true, label %for.end12, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %g_38.promoted = load i32, i32* @g_38
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond, %bb.nph
+  %tobool.i = icmp eq i32 %p_68.coerce, 1
+  %xor4.i = xor i32 %p_68.coerce, 1
+  %call1 = select i1 %tobool.i, i32 0, i32 %xor4.i
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body
+  br i1 true, label %for.cond.for.end12_crit_edge, label %for.body
+
+for.cond.for.end12_crit_edge:                     ; preds = %for.cond
+  store i32 %call1, i32* @g_38
+  br label %for.end12
+
+for.end12:                                        ; preds = %for.cond.for.end12_crit_edge, %entry
+  ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2011-06-02-CritSwitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt -simple-loop-unswitch -disable-output < %s
+; RUN: opt -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output < %s
+; PR10031
+
+define i32 @test(i32 %command) {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %if.then14, %tailrecurse, %entry
+  br i1 undef, label %if.then, label %tailrecurse
+
+if.then:                                          ; preds = %tailrecurse
+  switch i32 %command, label %sw.bb [
+    i32 2, label %land.lhs.true
+    i32 0, label %land.lhs.true
+  ]
+
+land.lhs.true:                                    ; preds = %if.then, %if.then
+  br i1 undef, label %sw.bb, label %if.then14
+
+if.then14:                                        ; preds = %land.lhs.true
+  switch i32 %command, label %tailrecurse [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %if.then14
+  unreachable
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,64 @@
+; RUN: opt < %s -sroa -simple-loop-unswitch -disable-output
+; RUN: opt < %s -sroa -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR11016
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.2"
+
+%class.MyContainer.1.3.19.29 = type { [6 x %class.MyMemVarClass.0.2.18.28*] }
+%class.MyMemVarClass.0.2.18.28 = type { i32 }
+
+define void @_ZN11MyContainer1fEi(%class.MyContainer.1.3.19.29* %this, i32 %doit) uwtable ssp align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %inc1 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+  %conv = sext i32 %inc1 to i64
+  %cmp = icmp ult i64 %conv, 6
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tobool = icmp ne i32 %doit, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %idxprom = sext i32 %inc1 to i64
+  %array_ = getelementptr inbounds %class.MyContainer.1.3.19.29, %class.MyContainer.1.3.19.29* %this, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [6 x %class.MyMemVarClass.0.2.18.28*], [6 x %class.MyMemVarClass.0.2.18.28*]* %array_, i32 0, i64 %idxprom
+  %tmp4 = load %class.MyMemVarClass.0.2.18.28*, %class.MyMemVarClass.0.2.18.28** %arrayidx, align 8
+  %isnull = icmp eq %class.MyMemVarClass.0.2.18.28* %tmp4, null
+  br i1 %isnull, label %for.inc, label %delete.notnull
+
+delete.notnull:                                   ; preds = %if.then
+  invoke void @_ZN13MyMemVarClassD1Ev(%class.MyMemVarClass.0.2.18.28* %tmp4)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %delete.notnull
+  %0 = bitcast %class.MyMemVarClass.0.2.18.28* %tmp4 to i8*
+  call void @_ZdlPv(i8* %0) nounwind
+  br label %for.inc
+
+lpad:                                             ; preds = %delete.notnull
+  %1 = landingpad { i8*, i32 }
+          cleanup
+  %2 = extractvalue { i8*, i32 } %1, 0
+  %3 = extractvalue { i8*, i32 } %1, 1
+  %4 = bitcast %class.MyMemVarClass.0.2.18.28* %tmp4 to i8*
+  call void @_ZdlPv(i8* %4) nounwind
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %2, 0
+  %lpad.val7 = insertvalue { i8*, i32 } %lpad.val, i32 %3, 1
+  resume { i8*, i32 } %lpad.val7
+
+for.inc:                                          ; preds = %invoke.cont, %if.then, %for.body
+  %inc = add nsw i32 %inc1, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+declare void @_ZN13MyMemVarClassD1Ev(%class.MyMemVarClass.0.2.18.28*)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZdlPv(i8*) nounwind

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-04-02-IndirectBr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt < %s -S -simple-loop-unswitch -verify-loop-info -verify-dom-info | FileCheck %s
+; RUN: opt < %s -S -simple-loop-unswitch -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa | FileCheck %s
+; PR12343: -simple-loop-unswitch crash on indirect branch
+
+; CHECK:       %0 = icmp eq i64 undef, 0
+; CHECK-NEXT:  br i1 %0, label %"5", label %"4"
+
+; CHECK:       "5":                                              ; preds = %entry
+; CHECK-NEXT:  br label %"16"
+
+; CHECK:       "16":                                             ; preds = %"22", %"5"
+; CHECK-NEXT:  indirectbr i8* undef, [label %"22", label %"33"]
+
+; CHECK:       "22":                                             ; preds = %"16"
+; CHECK-NEXT:  br i1 %0, label %"16", label %"26"
+
+; CHECK:       "26":                                             ; preds = %"22"
+; CHECK-NEXT:  unreachable
+
+define void @foo() {
+entry:
+  %0 = icmp eq i64 undef, 0
+  br i1 %0, label %"5", label %"4"
+
+"4":                                              ; preds = %entry
+  unreachable
+
+"5":                                              ; preds = %entry
+  br label %"16"
+
+"16":                                             ; preds = %"22", %"5"
+  indirectbr i8* undef, [label %"22", label %"33"]
+
+"22":                                             ; preds = %"16"
+  br i1 %0, label %"16", label %"26"
+
+"26":                                             ; preds = %"22"
+  unreachable
+
+"33":                                             ; preds = %"16"
+  unreachable
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,97 @@
+; RUN: opt < %s -basicaa -instcombine -inline -functionattrs -licm -simple-loop-unswitch -gvn -verify
+; PR12573
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379 = type { %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 }
+%class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376 = type { %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* }
+%class.B.21.41.65.101.137.157.177.197.237.241.245.249.261.293.301.337.345.378 = type { %class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377* }
+%class.A.20.40.64.100.136.156.176.196.236.240.244.248.260.292.300.336.344.377 = type { i8 }
+
+define void @_Z23get_reconstruction_pathv() uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  %c = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, align 8
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.end, %entry
+  invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %for.cond
+  invoke void @_ZN1C3endEv()
+          to label %for.cond3 unwind label %lpad
+
+for.cond3:                                        ; preds = %invoke.cont6, %invoke.cont
+  invoke void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %c)
+          to label %invoke.cont4 unwind label %lpad
+
+invoke.cont4:                                     ; preds = %for.cond3
+  invoke void @_ZN1C3endEv()
+          to label %invoke.cont6 unwind label %lpad
+
+invoke.cont6:                                     ; preds = %invoke.cont4
+  br i1 undef, label %for.cond3, label %for.end
+
+lpad:                                             ; preds = %for.end, %invoke.cont4, %for.cond3, %invoke.cont, %for.cond
+  %0 = landingpad { i8*, i32 }
+          cleanup
+  resume { i8*, i32 } undef
+
+for.end:                                          ; preds = %invoke.cont6
+  invoke void @_ZN1C13_M_insert_auxER1D()
+          to label %for.cond unwind label %lpad
+}
+
+define void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this) uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, align 8
+  store %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr, align 8
+  %this1 = load %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr
+  %px = getelementptr inbounds %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this1, i32 0, i32 0
+  %0 = load %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376** %px, align 8
+  %tobool = icmp ne %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376* %0, null
+  br i1 %tobool, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %entry
+  call void @_Z10__assert13v() noreturn
+  unreachable
+
+cond.end:                                         ; preds = %entry
+  ret void
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZN1C3endEv()
+
+define void @_ZN1C13_M_insert_auxER1D() uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1DD1Ev() unnamed_addr uwtable inlinehint ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1DD2Ev() unnamed_addr uwtable inlinehint ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BD1Ev() unnamed_addr uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BD2Ev() unnamed_addr uwtable ssp align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN1BaSERS_() uwtable ssp align 2 {
+entry:
+  unreachable
+}
+
+declare void @_Z10__assert13v() noreturn

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2012-05-20-Phi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,26 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+; PR12887
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at a = common global i32 0, align 4
+ at c = common global i32 0, align 4
+ at b = common global i32 0, align 4
+
+define void @func() noreturn nounwind uwtable {
+entry:
+  %0 = load i32, i32* @a, align 4
+  %tobool = icmp eq i32 %0, 0
+  %1 = load i32, i32* @b, align 4
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %d.0 = phi i8 [ undef, %entry ], [ %conv2, %while.body ]
+  %conv = sext i8 %d.0 to i32
+  %cond = select i1 %tobool, i32 0, i32 %conv
+  %conv11 = zext i8 %d.0 to i32
+  %add = add i32 %1, %conv11
+  %conv2 = trunc i32 %add to i8
+  br label %while.body
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/2015-09-18-Addrspace.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -simple-loop-unswitch -S | FileCheck %s
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s
+
+; In cases where two address spaces do not have the same size pointer, the
+; input for the addrspacecast should not be used as a substitute for itself
+; when manipulating the pointer.
+
+target datalayout = "e-m:e-p:16:16-p1:32:16-i32:16-i64:16-n8:16"
+
+define void @foo() {
+; CHECK-LABEL: @foo
+entry:
+  %arrayidx.i1 = getelementptr inbounds i16, i16* undef, i16 undef
+  %arrayidx.i = addrspacecast i16* %arrayidx.i1 to i16 addrspace(1)*
+  br i1 undef, label %for.body.i, label %bar.exit
+
+for.body.i:                                       ; preds = %for.body.i, %entry
+; When we call makeLoopInvariant (i.e. trivial LICM) on this load, it 
+; will try to find the base object to prove deferenceability.  If we look
+; through the addrspacecast, we'll fail an assertion about bitwidths matching
+; CHECK-LABEL: for.body.i
+; CHECK:   %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+  %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+  %cmp1.i = icmp eq i16 %0, 0
+  br i1 %cmp1.i, label %bar.exit, label %for.body.i
+
+bar.exit:                                         ; preds = %for.body.i, %entry
+  ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/LIV-loop-condtion.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt < %s -simple-loop-unswitch -S 2>&1 | FileCheck %s
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S 2>&1 | FileCheck %s
+
+; This is to test trivial loop unswitch only happens when trivial condition
+; itself is an LIV loop condition (not partial LIV which could occur in and/or).
+
+define i32 @test(i1 %cond1, i32 %var1) {
+; CHECK-LABEL: define i32 @test(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %entry.split, label %loop_exit.split
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+  %cond2 = icmp eq i32 %var3, 10
+  %cond.and = and i1 %cond1, %cond2
+  br i1 %cond.and, label %do_something, label %loop_exit
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[VAR3:.*]] = phi i32
+; CHECK-NEXT:    %[[COND2:.*]] = icmp eq i32 %[[VAR3]], 10
+; CHECK-NEXT:    %[[COND_AND:.*]] = and i1 true, %[[COND2]]
+; CHECK-NEXT:    br i1 %[[COND_AND]], label %do_something, label %loop_exit
+
+do_something:
+  %var2 = add i32 %var3, 1
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @some_func() noreturn 

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/basictest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/basictest.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/basictest.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/basictest.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,185 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+
+define i32 @test(i32* %A, i1 %C) {
+entry:
+	br label %no_exit
+no_exit:		; preds = %no_exit.backedge, %entry
+	%i.0.0 = phi i32 [ 0, %entry ], [ %i.0.0.be, %no_exit.backedge ]		; <i32> [#uses=3]
+	%gep.upgrd.1 = zext i32 %i.0.0 to i64		; <i64> [#uses=1]
+	%tmp.7 = getelementptr i32, i32* %A, i64 %gep.upgrd.1		; <i32*> [#uses=4]
+	%tmp.13 = load i32, i32* %tmp.7		; <i32> [#uses=2]
+	%tmp.14 = add i32 %tmp.13, 1		; <i32> [#uses=1]
+	store i32 %tmp.14, i32* %tmp.7
+	br i1 %C, label %then, label %endif
+then:		; preds = %no_exit
+	%tmp.29 = load i32, i32* %tmp.7		; <i32> [#uses=1]
+	%tmp.30 = add i32 %tmp.29, 2		; <i32> [#uses=1]
+	store i32 %tmp.30, i32* %tmp.7
+	%inc9 = add i32 %i.0.0, 1		; <i32> [#uses=2]
+	%tmp.112 = icmp ult i32 %inc9, 100000		; <i1> [#uses=1]
+	br i1 %tmp.112, label %no_exit.backedge, label %return
+no_exit.backedge:		; preds = %endif, %then
+	%i.0.0.be = phi i32 [ %inc9, %then ], [ %inc, %endif ]		; <i32> [#uses=1]
+	br label %no_exit
+endif:		; preds = %no_exit
+	%inc = add i32 %i.0.0, 1		; <i32> [#uses=2]
+	%tmp.1 = icmp ult i32 %inc, 100000		; <i1> [#uses=1]
+	br i1 %tmp.1, label %no_exit.backedge, label %return
+return:		; preds = %endif, %then
+	ret i32 %tmp.13
+}
+
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the noduplicate call.
+
+; CHECK-LABEL: @test2(
+define i32 @test2(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32, i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32, i32* %var
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+; CHECK: call void @decf()
+; CHECK-NOT: call void @decf()
+  call void @decf() noreturn nounwind noduplicate
+  br label %loop_begin
+default:
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+; CHECK: }
+}
+
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the convergent call that is not control-dependent on the unswitch condition.
+
+; CHECK-LABEL: @test3(
+define i32 @test3(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32, i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32, i32* %var
+
+; CHECK: call void @conv()
+; CHECK-NOT: call void @conv()
+  call void @conv() convergent
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+  call void @decf() noreturn nounwind
+  br label %loop_begin
+default:
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+; CHECK: }
+}
+
+; Make sure we don't unswitch, as we can not find an input value %a
+; that will effectively unswitch 0 or 3 out of the loop.
+;
+; CHECK: define void @and_or_i2_as_switch_input(i2
+; CHECK: entry:
+; This is an indication that the loop has NOT been unswitched.
+; CHECK-NOT: icmp
+; CHECK: br
+define void @and_or_i2_as_switch_input(i2 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i2 [ 0, %entry ], [ %inc, %for.inc ]
+  %and = and i2 %a, %i 
+  %or = or i2 %and, %i
+  switch i2 %or, label %sw.default [
+    i2 0, label %sw.bb
+    i2 3, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  br label %sw.epilog
+
+sw.default:
+  br label %sw.epilog
+
+sw.epilog:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i2 %i, 1
+  %cmp = icmp slt i2 %inc, 3 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Make sure we don't unswitch, as we can not find an input value %a
+; that will effectively unswitch true/false out of the loop.
+;
+; CHECK: define void @and_or_i1_as_branch_input(i1
+; CHECK: entry:
+; This is an indication that the loop has NOT been unswitched.
+; CHECK-NOT: icmp
+; CHECK: br
+define void @and_or_i1_as_branch_input(i1 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i1 [ 0, %entry ], [ %inc, %for.inc ]
+  %and = and i1 %a, %i 
+  %or = or i1 %and, %i
+  br i1 %or, label %sw.bb, label %sw.bb1
+
+sw.bb:
+  br label %sw.epilog
+
+sw.bb1:
+  br label %sw.epilog
+
+sw.epilog:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i1 %i, 1
+  %cmp = icmp slt i1 %inc, 1 
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
+declare void @conv() convergent

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/cleanuppad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/cleanuppad.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/cleanuppad.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/cleanuppad.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,45 @@
+; RUN: opt -S -simple-loop-unswitch < %s | FileCheck %s
+; RUN: opt -S -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+define void @f(i32 %doit, i1 %x, i1 %y) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %tobool = icmp eq i32 %doit, 0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  br i1 %x, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br i1 %tobool, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  br i1 %y, label %for.inc, label %delete.notnull
+
+delete.notnull:                                   ; preds = %if.then
+  invoke void @g()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %delete.notnull
+  br label %for.inc
+
+lpad:                                             ; preds = %delete.notnull
+  %cp = cleanuppad within none []
+  cleanupret from %cp unwind to caller
+
+for.inc:                                          ; preds = %invoke.cont, %if.then, %for.body
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
+
+; CHECK-LABEL: define void @f(
+; CHECK: cleanuppad within none []
+; CHECK-NOT: cleanuppad
+
+attributes #0 = { ssp uwtable }

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/copy-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/copy-metadata.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/copy-metadata.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/copy-metadata.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,35 @@
+; RUN: opt < %s -simple-loop-unswitch -S | FileCheck %s
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s
+
+; This test checks if unswitched condition preserve make.implicit metadata.
+define i32 @test(i1 %cond) {
+; CHECK-LABEL: @test(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split, label %loop_exit, !make.implicit !0
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  br i1 %cond, label %continue, label %loop_exit, !make.implicit !0
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %continue
+
+continue:
+  call void @some_func()
+  br label %loop_begin
+; CHECK:       continue:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret
+}
+
+declare void @some_func()
+
+!0 = !{}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/crash.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,67 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+; RUN: opt < %s -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output
+
+define void @test1(i32* %S2) {
+entry:
+	br i1 false, label %list_Length.exit, label %cond_true.i
+cond_true.i:		; preds = %entry
+	ret void
+list_Length.exit:		; preds = %entry
+	br i1 false, label %list_Length.exit9, label %cond_true.i5
+cond_true.i5:		; preds = %list_Length.exit
+	ret void
+list_Length.exit9:		; preds = %list_Length.exit
+	br i1 false, label %bb78, label %return
+bb44:		; preds = %bb78, %cond_next68
+	br i1 %tmp49.not, label %bb62, label %bb62.loopexit
+bb62.loopexit:		; preds = %bb44
+	br label %bb62
+bb62:		; preds = %bb62.loopexit, %bb44
+	br i1 false, label %return.loopexit, label %cond_next68
+cond_next68:		; preds = %bb62
+	br i1 false, label %return.loopexit, label %bb44
+bb78:		; preds = %list_Length.exit9
+	%tmp49.not = icmp eq i32* %S2, null		; <i1> [#uses=1]
+	br label %bb44
+return.loopexit:		; preds = %cond_next68, %bb62
+	%retval.0.ph = phi i32 [ 1, %cond_next68 ], [ 0, %bb62 ]		; <i32> [#uses=1]
+	br label %return
+return:		; preds = %return.loopexit, %list_Length.exit9
+	%retval.0 = phi i32 [ 0, %list_Length.exit9 ], [ %retval.0.ph, %return.loopexit ]		; <i32> [#uses=0]
+	ret void
+}
+
+define void @test2() nounwind {
+entry:
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %and.i13521 = and <4 x i1> undef, undef         ; <<4 x i1>> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %or.i = select <4 x i1> %and.i13521, <4 x i32> undef, <4 x i32> undef ; <<4 x i32>> [#uses=0]
+  br i1 false, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; PR6879
+define i32* @test3(i32** %p_45, i16 zeroext %p_46, i64 %p_47, i64 %p_48, i16 signext %p_49) nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond4, %entry
+  br i1 false, label %for.cond4, label %for.end88
+
+for.cond4:                                        ; preds = %for.cond
+  %conv46 = trunc i32 0 to i8                     ; <i8> [#uses=2]
+  %cmp60 = icmp sgt i8 %conv46, 124               ; <i1> [#uses=1]
+  %or.cond = and i1 undef, %cmp60                 ; <i1> [#uses=1]
+  %cond = select i1 %or.cond, i8 %conv46, i8 undef ; <i8> [#uses=0]
+  br label %for.cond
+
+for.end88:                                        ; preds = %for.cond
+  ret i32* undef
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/delete-dead-blocks.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,101 @@
+; RUN: opt < %s -simple-loop-unswitch -enable-nontrivial-unswitch -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=unswitch -enable-nontrivial-unswitch -S 2>&1 | FileCheck %s
+;
+; Checking that (dead) blocks from inner loop are deleted after unswitch.
+;
+declare void @foo()
+
+; CHECK-LABEL: @Test
+define void @Test(i32) {
+entry:
+  br label %outer
+outer:
+  %oi = phi i32 [ 0, %entry ], [ %oinc, %outer_continue]
+  br label %inner
+inner:
+  %ii = phi i32 [ 0, %outer ], [ %iinc, %continue]
+  call void @foo() 
+  switch i32 %0, label %get_out2 [
+    i32 0, label %continue
+    i32 1, label %case1
+    i32 2, label %get_out
+  ]
+;
+; since we unswitch on the above switch, %case1 and %continue blocks
+; become dead in the original loop
+;
+; CHECK-NOT: case1:
+case1:
+  br label %continue
+; CHECK-NOT: {{^}}continue:
+continue:
+  %iinc = add i32 %ii, 1
+  %icmp = icmp eq i32 %ii, 100
+  br i1 %icmp, label %inner, label %outer_continue
+
+outer_continue:
+  %oinc = add i32 %oi, 1
+  %ocmp = icmp eq i32 %oi, 100
+  br i1 %ocmp, label %outer, label %get_out
+
+get_out:
+  ret void
+get_out2:
+  unreachable
+}
+
+;
+; This comes from PR38778
+; CHECK-LABEL: @Test2
+define void @Test2(i32) {
+header:
+  br label %loop
+loop:
+  switch i32 %0, label %continue [
+    i32 -2147483648, label %check
+    i32 98, label %guarded1
+    i32 99, label %guarded2
+  ]
+; CHECK-NOT: {{^}}guarded1:
+guarded1:
+  br i1 undef, label %continue, label %leave
+guarded2:
+  br label %continue
+check:
+  %val = add i32 0, 1
+  br i1 undef, label %continue, label %leave
+continue:
+  br label %loop
+leave:
+  %local = phi i32 [ 0, %guarded1 ], [ %val, %check ]
+  ret void
+}
+
+;
+; Yet another test from PR38778
+;
+; CHECK-LABEL: @Test3
+define void @Test3(i32) {
+header:
+  br label %outer
+outer:
+  %bad_input.i = icmp eq i32 %0, -2147483648
+  br label %inner
+inner:
+  br i1 %bad_input.i, label %overflow, label %switchme
+overflow:
+  br label %continue
+switchme:
+  switch i32 %0, label %continue [
+    i32 88, label %go_out
+    i32 99, label %case2
+  ]
+; CHECK-NOT: {{^}}case2:
+case2:
+  br label %continue
+continue:
+  %local_11_92 = phi i32 [ 0, %switchme ], [ 18, %case2 ], [ 0, %overflow ]
+  br i1 undef, label %outer, label %inner
+go_out:
+  unreachable
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-behavior.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,52 @@
+; RUN: opt -simple-loop-unswitch -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
+
+define void @f(i32 %n, i32* %ptr) {
+; CHECK-LABEL: @f(
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+  %iv.inc = add i32 %iv, 1
+  %unswitch_cond_root = icmp ne i32 %iv.inc, 42
+  %us.0 = and i1 %unswitch_cond_root, %unswitch_cond_root
+  %us.1 = and i1 %us.0, %us.0
+  %us.2 = and i1 %us.1, %us.1
+  %us.3 = and i1 %us.2, %us.2
+  %us.4 = and i1 %us.3, %us.3
+  %us.5 = and i1 %us.4, %us.4
+  %us.6 = and i1 %us.5, %us.5
+  %us.7 = and i1 %us.6, %us.6
+  %us.8 = and i1 %us.7, %us.7
+  %us.9 = and i1 %us.8, %us.8
+  %us.10 = and i1 %us.9, %us.9
+  %us.11 = and i1 %us.10, %us.10
+  %us.12 = and i1 %us.11, %us.11
+  %us.13 = and i1 %us.12, %us.12
+  %us.14 = and i1 %us.13, %us.13
+  %us.15 = and i1 %us.14, %us.14
+  %us.16 = and i1 %us.15, %us.15
+  %us.17 = and i1 %us.16, %us.16
+  %us.18 = and i1 %us.17, %us.17
+  %us.19 = and i1 %us.18, %us.18
+  %us.20 = and i1 %us.19, %us.19
+  %us.21 = and i1 %us.20, %us.20
+  %us.22 = and i1 %us.21, %us.21
+  %us.23 = and i1 %us.22, %us.22
+  %us.24 = and i1 %us.23, %us.23
+  %us.25 = and i1 %us.24, %us.24
+  %us.26 = and i1 %us.25, %us.25
+  %us.27 = and i1 %us.26, %us.26
+  %us.28 = and i1 %us.27, %us.27
+  %us.29 = and i1 %us.28, %us.28
+  br i1 %us.29, label %leave, label %be
+
+be:
+  store volatile i32 0, i32* %ptr
+  %becond = icmp ult i32 %iv.inc, %n
+  br i1 %becond, label %leave, label %loop
+
+leave:
+  ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,139 @@
+;
+; There should be just a single copy of each loop when strictest mutiplier
+; candidates formula (unscaled candidates == 0) is enforced:
+
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+;
+; When we relax the candidates part of a multiplier formula
+; (unscaled candidates == 4) we start getting  some unswitches,
+; which leads to siblings multiplier kicking in.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=4 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:     sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE4-DIV1
+;
+; NB: sort -b is essential here and below, otherwise blanks might lead to different
+; order depending on locale.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=4 -unswitch-siblings-toplevel-div=2 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:     sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE4-DIV2
+;
+;
+; Get
+;    2^(num conds) == 2^5 = 32
+; loop nests when cost multiplier is disabled:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:	   sort -b -k 1 | FileCheck %s --check-prefixes=LOOP32
+;
+; Single loop nest, not unswitched
+; LOOP1:     Loop at depth 1 containing:
+; LOOP1:     Loop at depth 2 containing:
+; LOOP1:     Loop at depth 3 containing:
+; LOOP1-NOT: Loop at depth {{[0-9]+}} containing:
+;
+; Half unswitched loop nests, with unscaled4 and div1 it gets less depth1 loops unswitched
+; since they have more cost.
+; LOOP-UNSCALE4-DIV1-COUNT-6: Loop at depth 1 containing:
+; LOOP-UNSCALE4-DIV1-COUNT-19: Loop at depth 2 containing:
+; LOOP-UNSCALE4-DIV1-COUNT-29: Loop at depth 3 containing:
+; LOOP-UNSCALE4-DIV1-NOT:      Loop at depth {{[0-9]+}} containing:
+;
+; Half unswitched loop nests, with unscaled4 and div2 it gets more depth1 loops unswitched
+; as div2 kicks in.
+; LOOP-UNSCALE4-DIV2-COUNT-11: Loop at depth 1 containing:
+; LOOP-UNSCALE4-DIV2-COUNT-22: Loop at depth 2 containing:
+; LOOP-UNSCALE4-DIV2-COUNT-29: Loop at depth 3 containing:
+; LOOP-UNSCALE4-DIV2-NOT:      Loop at depth {{[0-9]+}} containing:
+;
+; 32 loop nests, fully unswitched
+; LOOP32-COUNT-32: Loop at depth 1 containing:
+; LOOP32-COUNT-32: Loop at depth 2 containing:
+; LOOP32-COUNT-32: Loop at depth 3 containing:
+; LOOP32-NOT:      Loop at depth {{[0-9]+}} containing:
+
+declare void @bar()
+
+define void @loop_nested3_conds5(i32* %addr, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) {
+entry:
+  %addr1 = getelementptr i32, i32* %addr, i64 0
+  %addr2 = getelementptr i32, i32* %addr, i64 1
+  %addr3 = getelementptr i32, i32* %addr, i64 2
+  br label %outer
+outer:
+  %iv1 = phi i32 [0, %entry], [%iv1.next, %outer_latch]
+  %iv1.next = add i32 %iv1, 1
+  ;; skip nontrivial unswitch
+  call void @bar()
+  br label %middle
+middle:
+  %iv2 = phi i32 [0, %outer], [%iv2.next, %middle_latch]
+  %iv2.next = add i32 %iv2, 1
+  ;; skip nontrivial unswitch
+  call void @bar()
+  br label %loop
+loop:
+  %iv3 = phi i32 [0, %middle], [%iv3.next, %loop_latch]
+  %iv3.next = add i32 %iv3, 1
+  ;; skip nontrivial unswitch
+  call void @bar()
+  br i1 %c1, label %loop_next1_left, label %loop_next1_right
+loop_next1_left:
+  br label %loop_next1
+loop_next1_right:
+  br label %loop_next1
+
+loop_next1:
+  br i1 %c2, label %loop_next2_left, label %loop_next2_right
+loop_next2_left:
+  br label %loop_next2
+loop_next2_right:
+  br label %loop_next2
+
+loop_next2:
+  br i1 %c3, label %loop_next3_left, label %loop_next3_right
+loop_next3_left:
+  br label %loop_next3
+loop_next3_right:
+  br label %loop_next3
+
+loop_next3:
+  br i1 %c4, label %loop_next4_left, label %loop_next4_right
+loop_next4_left:
+  br label %loop_next4
+loop_next4_right:
+  br label %loop_next4
+
+loop_next4:
+  br i1 %c5, label %loop_latch_left, label %loop_latch_right
+loop_latch_left:
+  br label %loop_latch
+loop_latch_right:
+  br label %loop_latch
+
+loop_latch:
+  store volatile i32 0, i32* %addr1
+  %test_loop = icmp slt i32 %iv3, 50
+  br i1 %test_loop, label %loop, label %middle_latch
+middle_latch:
+  store volatile i32 0, i32* %addr2
+  %test_middle = icmp slt i32 %iv2, 50
+  br i1 %test_middle, label %middle, label %outer_latch
+outer_latch:
+  store volatile i32 0, i32* %addr3
+  %test_outer = icmp slt i32 %iv1, 50
+  br i1 %test_outer, label %outer, label %exit
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested2.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch-nested2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,149 @@
+;
+; Here all the branches we unswitch are exiting from the inner loop.
+; That means we should not be getting exponential behavior on inner-loop
+; unswitch. In fact there should be just a single version of inner-loop,
+; with possibly some outer loop copies.
+;
+; There should be just a single copy of each loop when strictest mutiplier
+; candidates formula (unscaled candidates == 0) is enforced:
+
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+;
+; When we relax the candidates part of a multiplier formula
+; (unscaled candidates == 2) we start getting some unswitches in outer loops,
+; which leads to siblings multiplier kicking in.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=3 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:     sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE3-DIV1
+;
+; NB: sort -b is essential here and below, otherwise blanks might lead to different
+; order depending on locale.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=3 -unswitch-siblings-toplevel-div=2 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:     sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-UNSCALE3-DIV2
+;
+; With disabled cost-multiplier we get maximal possible amount of unswitches.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:	   sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-MAX
+;
+; Single loop nest, not unswitched
+; LOOP1:     Loop at depth 1 containing:
+; LOOP1-NOT:  Loop at depth 1 containing:
+; LOOP1:     Loop at depth 2 containing:
+; LOOP1-NOT:  Loop at depth 2 containing:
+; LOOP1:     Loop at depth 3 containing:
+; LOOP1-NOT:  Loop at depth 3 containing:
+;
+; Half unswitched loop nests, with unscaled3 and div1 it gets less depth1 loops unswitched
+; since they have more cost.
+; LOOP-UNSCALE3-DIV1-COUNT-4: Loop at depth 1 containing:
+; LOOP-UNSCALE3-DIV1-NOT:      Loop at depth 1 containing:
+; LOOP-UNSCALE3-DIV1-COUNT-1: Loop at depth 2 containing:
+; LOOP-UNSCALE3-DIV1-NOT:      Loop at depth 2 containing:
+; LOOP-UNSCALE3-DIV1-COUNT-1: Loop at depth 3 containing:
+; LOOP-UNSCALE3-DIV1-NOT:      Loop at depth 3 containing:
+;
+; Half unswitched loop nests, with unscaled3 and div2 it gets more depth1 loops unswitched
+; as div2 kicks in.
+; LOOP-UNSCALE3-DIV2-COUNT-6: Loop at depth 1 containing:
+; LOOP-UNSCALE3-DIV2-NOT:      Loop at depth 1 containing:
+; LOOP-UNSCALE3-DIV2-COUNT-1: Loop at depth 2 containing:
+; LOOP-UNSCALE3-DIV2-NOT:      Loop at depth 2 containing:
+; LOOP-UNSCALE3-DIV2-COUNT-1: Loop at depth 3 containing:
+; LOOP-UNSCALE3-DIV2-NOT:      Loop at depth 3 containing:
+;
+; Maximally unswitched (copy of the outer loop per each condition)
+; LOOP-MAX-COUNT-6: Loop at depth 1 containing:
+; LOOP-MAX-NOT:      Loop at depth 1 containing:
+; LOOP-MAX-COUNT-1: Loop at depth 2 containing:
+; LOOP-MAX-NOT:      Loop at depth 2 containing:
+; LOOP-MAX-COUNT-1: Loop at depth 3 containing:
+; LOOP-MAX-NOT:      Loop at depth 3 containing:
+
+declare void @bar()
+
+define void @loop_nested3_conds5(i32* %addr, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) {
+entry:
+  %addr1 = getelementptr i32, i32* %addr, i64 0
+  %addr2 = getelementptr i32, i32* %addr, i64 1
+  %addr3 = getelementptr i32, i32* %addr, i64 2
+  br label %outer
+outer:
+  %iv1 = phi i32 [0, %entry], [%iv1.next, %outer_latch]
+  %iv1.next = add i32 %iv1, 1
+  ;; skip nontrivial unswitch
+  call void @bar()
+  br label %middle
+middle:
+  %iv2 = phi i32 [0, %outer], [%iv2.next, %middle_latch]
+  %iv2.next = add i32 %iv2, 1
+  ;; skip nontrivial unswitch
+  call void @bar()
+  br label %loop
+loop:
+  %iv3 = phi i32 [0, %middle], [%iv3.next, %loop_latch]
+  %iv3.next = add i32 %iv3, 1
+  ;; skip nontrivial unswitch
+  call void @bar()
+  br i1 %c1, label %loop_next1_left, label %outer_latch
+loop_next1_left:
+  br label %loop_next1
+loop_next1_right:
+  br label %loop_next1
+
+loop_next1:
+  br i1 %c2, label %loop_next2_left, label %outer_latch
+loop_next2_left:
+  br label %loop_next2
+loop_next2_right:
+  br label %loop_next2
+
+loop_next2:
+  br i1 %c3, label %loop_next3_left, label %outer_latch
+loop_next3_left:
+  br label %loop_next3
+loop_next3_right:
+  br label %loop_next3
+
+loop_next3:
+  br i1 %c4, label %loop_next4_left, label %outer_latch
+loop_next4_left:
+  br label %loop_next4
+loop_next4_right:
+  br label %loop_next4
+
+loop_next4:
+  br i1 %c5, label %loop_latch_left, label %outer_latch
+loop_latch_left:
+  br label %loop_latch
+loop_latch_right:
+  br label %loop_latch
+
+loop_latch:
+  store volatile i32 0, i32* %addr1
+  %test_loop = icmp slt i32 %iv3, 50
+  br i1 %test_loop, label %loop, label %middle_latch
+middle_latch:
+  store volatile i32 0, i32* %addr2
+  %test_middle = icmp slt i32 %iv2, 50
+  br i1 %test_middle, label %middle, label %outer_latch
+outer_latch:
+  store volatile i32 0, i32* %addr3
+  %test_outer = icmp slt i32 %iv1, 50
+  br i1 %test_outer, label %outer, label %exit
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,80 @@
+;
+; There should be just a single copy of loop when strictest mutiplier candidates
+; formula (unscaled candidates == 0) is enforced:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=8 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; With relaxed candidates multiplier (unscaled candidates == 8) we should allow
+; some unswitches to happen until siblings multiplier starts kicking in:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP5
+;
+; With relaxed candidates multiplier (unscaled candidates == 8) and with relaxed
+; siblings multiplier for top-level loops (toplevel-div == 8) we should get
+;    2^(num conds) == 2^5 == 32
+; copies of the loop:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP32
+;
+; Similarly get
+;    2^(num conds) == 2^5 == 32
+; copies of the loop when cost multiplier is disabled:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP32
+;
+;
+; Single loop, not unswitched
+; LOOP1:     Loop at depth 1 containing:
+; LOOP1-NOT: Loop at depth 1 containing:
+
+; 5 loops, unswitched 4 times
+; LOOP5-COUNT-5: Loop at depth 1 containing:
+; LOOP5-NOT:     Loop at depth 1 containing:
+
+; 32 loops, fully unswitched
+; LOOP32-COUNT-32: Loop at depth 1 containing:
+; LOOP32-NOT:     Loop at depth 1 containing:
+
+define void @loop_simple5(i32* %addr, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32 [0, %entry], [%iv.next, %loop_latch]
+  %iv.next = add i32 %iv, 1
+  br i1 %c1, label %loop_next1, label %loop_next1_right
+loop_next1_right:
+  br label %loop_next1
+loop_next1:
+  br i1 %c2, label %loop_next2, label %loop_next2_right
+loop_next2_right:
+  br label %loop_next2
+loop_next2:
+  br i1 %c3, label %loop_next3, label %loop_next3_right
+loop_next3_right:
+  br label %loop_next3
+loop_next3:
+  br i1 %c4, label %loop_next4, label %loop_next4_right
+loop_next4_right:
+  br label %loop_next4
+loop_next4:
+  br i1 %c5, label %loop_latch, label %loop_latch_right
+loop_latch_right:
+  br label %loop_latch
+loop_latch:
+  store volatile i32 0, i32* %addr
+  %test_loop = icmp slt i32 %iv, 50
+  br i1 %test_loop, label %loop, label %exit
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-nontrivial-unswitch2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+;
+; Here all the branches are exiting ones. Checking that we dont have
+; exponential behavior with any kind of controlling heuristics here.
+;
+; There we should have just a single loop.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=8 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+;
+; Single loop, not unswitched
+; LOOP1:     Loop at depth 1 containing:
+; LOOP1-NOT: Loop at depth 1 containing:
+
+declare void @bar()
+
+define void @loop_simple5(i32* %addr, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32 [0, %entry], [%iv.next, %loop_latch]
+  %iv.next = add i32 %iv, 1
+  ;; disabling trivial unswitch
+  call void @bar()
+  br i1 %c1, label %loop_next1, label %exit
+loop_next1:
+  br i1 %c2, label %loop_next2, label %exit
+loop_next2:
+  br i1 %c3, label %loop_next3, label %exit
+loop_next3:
+  br i1 %c4, label %loop_next4, label %exit
+loop_next4:
+  br i1 %c5, label %loop_latch, label %exit
+loop_latch:
+  store volatile i32 0, i32* %addr
+  %test_loop = icmp slt i32 %iv, 50
+  br i1 %test_loop, label %loop, label %exit
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/exponential-switch-unswitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,118 @@
+;
+; Here we have 5-way unswitchable switch with each successor also having an unswitchable
+; exiting branch in it. If we start unswitching those branches we start duplicating the
+; whole switch. This can easily lead to exponential behavior w/o proper control.
+; On a real-life testcase there was 16-way switch and that took forever to compile w/o
+; a cost control.
+;
+;
+; When we use the stricted multiplier candidates formula (unscaled candidates == 0)
+; we should be getting just a single loop.
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1
+;
+;
+; With relaxed candidates multiplier (unscaled candidates == 8) we should allow
+; some unswitches to happen until siblings multiplier starts kicking in:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:     sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-RELAX
+;
+; With relaxed candidates multiplier (unscaled candidates == 8) and with relaxed
+; siblings multiplier for top-level loops (toplevel-div == 8) we should get
+; considerably more copies of the loop (especially top-level ones).
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=true \
+; RUN:     -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:     sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-RELAX2
+;
+; We get hundreds of copies of the loop when cost multiplier is disabled:
+;
+; RUN: opt < %s -enable-nontrivial-unswitch -enable-unswitch-cost-multiplier=false \
+; RUN:     -passes='loop(unswitch),print<loops>' -disable-output 2>&1 | \
+; RUN:     sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-MAX
+;
+
+; Single loop nest, not unswitched
+; LOOP1:     Loop at depth 1 containing:
+; LOOP1-NOT: Loop at depth 1 containing:
+; LOOP1:     Loop at depth 2 containing:
+; LOOP1-NOT: Loop at depth 2 containing:
+;
+; Somewhat relaxed restrictions on candidates:
+; LOOP-RELAX-COUNT-5:     Loop at depth 1 containing:
+; LOOP-RELAX-NOT: Loop at depth 1 containing:
+; LOOP-RELAX-COUNT-32:     Loop at depth 2 containing:
+; LOOP-RELAX-NOT: Loop at depth 2 containing:
+;
+; Even more relaxed restrictions on candidates and siblings.
+; LOOP-RELAX2-COUNT-11:     Loop at depth 1 containing:
+; LOOP-RELAX2-NOT: Loop at depth 1 containing:
+; LOOP-RELAX2-COUNT-40:     Loop at depth 2 containing:
+; LOOP-RELAX-NOT: Loop at depth 2 containing:
+;
+; Unswitched as much as it could (with multiplier disabled).
+; LOOP-MAX-COUNT-56:     Loop at depth 1 containing:
+; LOOP-MAX-NOT: Loop at depth 1 containing:
+; LOOP-MAX-COUNT-111:     Loop at depth 2 containing:
+; LOOP-MAX-NOT: Loop at depth 2 containing:
+
+define i32 @loop_switch(i32* %addr, i32 %c1, i32 %c2) {
+entry:
+  %addr1 = getelementptr i32, i32* %addr, i64 0
+  %addr2 = getelementptr i32, i32* %addr, i64 1
+  %check0 = icmp eq i32 %c2, 0
+  %check1 = icmp eq i32 %c2, 31
+  %check2 = icmp eq i32 %c2, 32
+  %check3 = icmp eq i32 %c2, 33
+  %check4 = icmp eq i32 %c2, 34
+  br label %outer_loop
+
+outer_loop:
+  %iv1 = phi i32 [0, %entry], [%iv1.next, %outer_latch]
+  %iv1.next = add i32 %iv1, 1
+  br label %inner_loop
+inner_loop:
+  %iv2 = phi i32 [0, %outer_loop], [%iv2.next, %inner_latch]
+  %iv2.next = add i32 %iv2, 1
+  switch i32 %c1, label %inner_latch [
+    i32 0, label %case0
+    i32 1, label %case1
+    i32 2, label %case2
+    i32 3, label %case3
+    i32 4, label %case4
+  ]
+
+case4:
+  br i1 %check4, label %exit, label %inner_latch
+case3:
+  br i1 %check3, label %exit, label %inner_latch
+case2:
+  br i1 %check2, label %exit, label %inner_latch
+case1:
+  br i1 %check1, label %exit, label %inner_latch
+case0:
+  br i1 %check0, label %exit, label %inner_latch
+
+inner_latch:
+  store volatile i32 0, i32* %addr1
+  %test_inner = icmp slt i32 %iv2, 50
+  br i1 %test_inner, label %inner_loop, label %outer_latch
+
+outer_latch:
+  store volatile i32 0, i32* %addr2
+  %test_outer = icmp slt i32 %iv1, 50
+  br i1 %test_outer, label %outer_loop, label %exit
+
+exit:                                            ; preds = %bci_0
+  ret i32 1
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+
+; PR38283
+; PR38737
+define void @f1() {
+for.cond1thread-pre-split.lr.ph.lr.ph:
+  %tobool4 = icmp eq i16 undef, 0
+  br label %for.cond1thread-pre-split
+
+for.cond1thread-pre-split:                        ; preds = %if.end, %for.cond1thread-pre-split.lr.ph.lr.ph
+  %tobool3 = icmp eq i16 undef, 0
+  br label %for.body2
+
+for.body2:                                        ; preds = %if.end6, %for.cond1thread-pre-split
+  br i1 %tobool3, label %if.end, label %for.end
+
+if.end:                                           ; preds = %for.body2
+  br i1 %tobool4, label %if.end6, label %for.cond1thread-pre-split
+
+if.end6:                                          ; preds = %if.end
+  br i1 undef, label %for.body2, label %for.end
+
+for.end:                                          ; preds = %if.end6, %for.body2
+  ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial2.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+
+; PR38283
+; PR38737
+define void @Test(i32) {
+entry:
+  %trunc = trunc i32 %0 to i3
+  br label %outer
+outer:
+  br label %inner
+inner:
+  switch i3 %trunc, label %crit_edge [
+    i3 2, label %break
+    i3 1, label %loopexit
+  ]
+crit_edge:
+  br i1 true, label %loopexit, label %inner
+loopexit:
+  ret void
+break:
+  br label %outer
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial3.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial3.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial3.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+; RUN: opt < %s -simple-loop-unswitch -disable-output
+
+; PR38283
+; PR38737
+declare void @func_1()
+
+define void @func_9(i32 signext %arg) {
+bb:
+  br label %bb5
+bb5:                                              ; preds = %bb24, %bb
+  %tmp3.0 = phi i32 [ undef, %bb ], [ %tmp29, %bb24 ]
+  %tmp11 = icmp eq i32 %arg, 0
+  %tmp15 = icmp eq i32 %tmp3.0, 0
+  %spec.select = select i1 %tmp15, i32 0, i32 49
+  %tmp1.2 = select i1 %tmp11, i32 %spec.select, i32 9
+  %trunc = trunc i32 %tmp1.2 to i6
+  br label %bb9
+
+bb9:                                              ; preds = %bb5, %bb19
+  %tmp2.03 = phi i32 [ 0, %bb5 ], [ %tmp21, %bb19 ]
+  switch i6 %trunc, label %bb24 [
+    i6 0, label %bb19
+    i6 -15, label %bb22
+  ]
+
+bb19:                                             ; preds = %bb9
+  %tmp21 = add nuw nsw i32 %tmp2.03, 1
+  %tmp8 = icmp eq i32 %tmp21, 25
+  br i1 %tmp8, label %bb22, label %bb9
+
+bb22:                                             ; preds = %bb19, %bb9
+  unreachable
+
+bb24:                                             ; preds = %bb9
+  %tmp29 = or i32 %tmp3.0, 1
+  br label %bb5
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/guards.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/guards.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/guards.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/guards.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,239 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -simple-loop-unswitch-guards -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -simple-loop-unswitch-guards -S < %s | FileCheck %s
+; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -simple-loop-unswitch-guards -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @test_simple_case(i1 %cond, i32 %N) {
+; CHECK-LABEL: @test_simple_case(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label [[LOOP_US:%.*]]
+; CHECK:       loop.us:
+; CHECK-NEXT:    [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
+; CHECK-NEXT:    br label [[GUARDED_US]]
+; CHECK:       guarded.us:
+; CHECK-NEXT:    [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:    [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+;
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  %iv.next = add i32 %iv, 1
+  %loop.cond = icmp slt i32 %iv.next, %N
+  br i1 %loop.cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
+; CHECK-LABEL: @test_two_guards(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[ENTRY_SPLIT_US_SPLIT_US:%.*]], label [[ENTRY_SPLIT_US_SPLIT:%.*]]
+; CHECK:       entry.split.us.split.us:
+; CHECK-NEXT:    br label [[LOOP_US_US:%.*]]
+; CHECK:       loop.us.us:
+; CHECK-NEXT:    [[IV_US_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US_SPLIT_US]] ], [ [[IV_NEXT_US_US:%.*]], [[GUARDED_US2:%.*]] ]
+; CHECK-NEXT:    br label [[GUARDED_US_US:%.*]]
+; CHECK:       guarded.us.us:
+; CHECK-NEXT:    br label [[GUARDED_US2]]
+; CHECK:       guarded.us2:
+; CHECK-NEXT:    [[IV_NEXT_US_US]] = add i32 [[IV_US_US]], 1
+; CHECK-NEXT:    [[LOOP_COND_US_US:%.*]] = icmp slt i32 [[IV_NEXT_US_US]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[LOOP_COND_US_US]], label [[LOOP_US_US]], label [[EXIT_SPLIT_US_SPLIT_US:%.*]]
+; CHECK:       deopt1:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond2) [ "deopt"() ]
+  %iv.next = add i32 %iv, 1
+  %loop.cond = icmp slt i32 %iv.next, %N
+  br i1 %loop.cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @test_conditional_guards(i1 %cond, i32 %N) {
+; CHECK-LABEL: @test_conditional_guards(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label [[LOOP_US:%.*]]
+; CHECK:       loop.us:
+; CHECK-NEXT:    [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[BACKEDGE_US:%.*]] ]
+; CHECK-NEXT:    [[CONDITION_US:%.*]] = icmp eq i32 [[IV_US]], 123
+; CHECK-NEXT:    br i1 [[CONDITION_US]], label [[GUARD_US:%.*]], label [[BACKEDGE_US]]
+; CHECK:       guard.us:
+; CHECK-NEXT:    br label [[GUARDED_US:%.*]]
+; CHECK:       backedge.us:
+; CHECK-NEXT:    [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:    [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[CONDITION:%.*]] = icmp eq i32 [[IV]], 123
+; CHECK-NEXT:    br i1 [[CONDITION]], label [[GUARD:%.*]], label [[BACKEDGE]]
+; CHECK:       guard:
+; CHECK-NEXT:    br label [[DEOPT:%.*]]
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+; CHECK:       backedge:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[LOOP_COND]], label %loop, label [[EXIT_SPLIT:%.*]]
+;
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
+  %condition = icmp eq i32 %iv, 123
+  br i1 %condition, label %guard, label %backedge
+
+guard:
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  br label %backedge
+
+backedge:
+  %iv.next = add i32 %iv, 1
+  %loop.cond = icmp slt i32 %iv.next, %N
+  br i1 %loop.cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @test_nested_loop(i1 %cond, i32 %N) {
+; CHECK-LABEL: @test_nested_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[ENTRY_SPLIT:%.*]], label [[OUTER_LOOP_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OUTER_LOOP:%.*]]
+; CHECK:       outer_loop:
+; CHECK-NEXT:    br label [[OUTER_LOOP_SPLIT_US:%.*]]
+; CHECK:       outer_loop.split.us:
+; CHECK-NEXT:    br label [[LOOP_US:%.*]]
+; CHECK:       loop.us:
+; CHECK-NEXT:    [[IV_US:%.*]] = phi i32 [ 0, [[OUTER_LOOP_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
+; CHECK-NEXT:    br label [[GUARDED_US]]
+; CHECK:       guarded.us:
+; CHECK-NEXT:    [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:    [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[OUTER_BACKEDGE_SPLIT_US:%.*]]
+; CHECK:       outer_backedge.split.us:
+; CHECK-NEXT:    br label [[OUTER_BACKEDGE:%.*]]
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+; CHECK:       outer_backedge:
+; CHECK-NEXT:    br i1 false, label [[OUTER_LOOP]], label [[EXIT:%.*]]
+;
+
+entry:
+  br label %outer_loop
+
+outer_loop:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %outer_loop ], [ %iv.next, %loop ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  %iv.next = add i32 %iv, 1
+  %loop.cond = icmp slt i32 %iv.next, %N
+  br i1 %loop.cond, label %loop, label %outer_backedge
+
+outer_backedge:
+  br i1 undef, label %outer_loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @test_sibling_loops(i1 %cond1, i1 %cond2, i32 %N) {
+; CHECK-LABEL: @test_sibling_loops(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK:         [[IV1_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV1_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
+; CHECK-NEXT:    br label [[GUARDED_US]]
+; CHECK:         call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+; CHECK:         [[IV2_US:%.*]] = phi i32 [ 0, [[BETWEEN:%.*]] ], [ [[IV1_NEXT_US2:%.*]], [[GUARDED_US2:%.*]] ]
+; CHECK-NEXT:    br label [[GUARDED_US2]]
+; CHECK:         call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+;
+
+entry:
+  br label %loop1
+
+loop1:
+  %iv1 = phi i32 [ 0, %entry ], [ %iv1.next, %loop1 ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
+  %iv1.next = add i32 %iv1, 1
+  %loop1.cond = icmp slt i32 %iv1.next, %N
+  br i1 %loop1.cond, label %loop1, label %between
+
+between:
+  br label %loop2
+
+loop2:
+  %iv2 = phi i32 [ 0, %between ], [ %iv2.next, %loop2 ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond2) [ "deopt"() ]
+  %iv2.next = add i32 %iv2, 1
+  %loop2.cond = icmp slt i32 %iv2.next, %N
+  br i1 %loop2.cond, label %loop2, label %exit
+
+exit:
+  ret void
+}
+
+; Check that we don't do anything because of cleanuppad.
+; CHECK-LABEL: @test_cleanuppad(
+; CHECK:       call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+; CHECK-NOT:   call void (i1, ...) @llvm.experimental.guard(
+define void @test_cleanuppad(i1 %cond, i32 %N) personality i32 (...)* @__CxxFrameHandler3 {
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  %iv.next = add i32 %iv, 1
+  invoke void @may_throw(i32 %iv) to label %loop unwind label %exit
+
+exit:
+  %cp = cleanuppad within none []
+  cleanupret from %cp unwind to caller
+
+}
+
+declare void @may_throw(i32 %i)
+declare i32 @__CxxFrameHandler3(...)

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/infinite-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/infinite-loop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/infinite-loop.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/infinite-loop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,65 @@
+; REQUIRES: asserts
+; RUN: opt -simple-loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -simple-loop-unswitch -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
+; PR5373
+
+; Loop unswitching shouldn't trivially unswitch the true case of condition %a
+; in the code here because it leads to an infinite loop. While this doesn't
+; contain any instructions with side effects, it's still a kind of side effect.
+; It can trivially unswitch on the false cas of condition %a though.
+
+; STATS: 2 simple-loop-unswitch - Number of branches unswitched
+; STATS: 2 simple-loop-unswitch - Number of unswitches that are trivial
+
+; CHECK-LABEL: @func_16(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0
+
+; CHECK: entry.split:
+; CHECK-NEXT: br i1 %b, label %entry.split.split, label %abort1
+
+; CHECK: entry.split.split:
+; CHECK-NEXT: br label %for.body
+
+; CHECK: for.body:
+; CHECK-NEXT: br label %cond.end
+
+; CHECK: cond.end:
+; CHECK-NEXT: br label %for.body
+
+; CHECK: abort0:
+; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
+; CHECK-NEXT: unreachable
+
+; CHECK: abort1:
+; CHECK-NEXT: call void @end1() [[NOR_NUW]]
+; CHECK-NEXT: unreachable
+
+; CHECK: }
+
+define void @func_16(i1 %a, i1 %b) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  br i1 %a, label %cond.end, label %abort0
+
+cond.end:
+  br i1 %b, label %for.body, label %abort1
+
+abort0:
+  call void @end0() noreturn nounwind
+  unreachable
+
+abort1:
+  call void @end1() noreturn nounwind
+  unreachable
+}
+
+declare void @end0() noreturn
+declare void @end1() noreturn
+
+; CHECK: attributes #0 = { nounwind }
+; CHECK: attributes #1 = { noreturn }
+; CHECK: attributes [[NOR_NUW]] = { noreturn nounwind }

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/msan.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/msan.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/msan.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/msan.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,142 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+
+declare void @unknown()
+declare void @unknown2()
+
+ at y = global i64 0, align 8
+
+; The following is approximately:
+; void f(bool *x) {
+;   for (int i = 0; i < 1; ++i) {
+;     if (*x) {
+;       if (y)
+;         unknown();
+;       else
+;         break;
+;     }
+;   }
+; }
+; With MemorySanitizer, the loop can not be unswitched on "y", because "y" could
+; be uninitialized when x == false.
+; Test that the branch on "y" is inside the loop (after the first unconditional
+; branch).
+
+define void @may_not_execute_trivial(i1* %x) sanitize_memory {
+; CHECK-LABEL: @may_not_execute_trivial(
+entry:
+  %y = load i64, i64* @y, align 8
+  %y.cmp = icmp eq i64 %y, 0
+  br label %for.body
+; CHECK: %[[Y:.*]] = load i64, i64* @y
+; CHECK: %[[YCMP:.*]] = icmp eq i64 %[[Y]], 0
+; CHECK-NOT: br i1
+; CHECK: br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %x.load = load i1, i1* %x
+  br i1 %x.load, label %for.inc, label %if.then
+; CHECK: %[[XLOAD:.*]] = load i1, i1* %x
+; CHECK: br i1 %[[XLOAD]]
+
+if.then:
+  br i1 %y.cmp, label %for.end, label %if.then4
+; CHECK: br i1 %[[YCMP]]
+
+if.then4:
+  call void @unknown()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %inc, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+
+; The same as above, but "y" is a function parameter instead of a global.
+; This shows that it is not enough to suppress hoisting of load instructions,
+; the actual problem is in the speculative branching.
+
+define void @may_not_execute2_trivial(i1* %x, i1 %y) sanitize_memory {
+; CHECK-LABEL: @may_not_execute2_trivial(
+entry:
+  br label %for.body
+; CHECK-NOT: br i1
+; CHECK: br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %x.load = load i1, i1* %x
+  br i1 %x.load, label %for.inc, label %if.then
+; CHECK: %[[XLOAD:.*]] = load i1, i1* %x
+; CHECK: br i1 %[[XLOAD]]
+
+if.then:
+  br i1 %y, label %for.end, label %if.then4
+; CHECK: br i1 %y
+
+if.then4:
+  call void @unknown()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %inc, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+
+; The following is approximately:
+; void f() {
+;   for (int i = 0; i < 1; ++i) {
+;     if (y)
+;       unknown();
+;     else
+;       break;
+;   }
+; }
+; "if (y)" is guaranteed to execute; the loop can be unswitched.
+
+define void @must_execute_trivial() sanitize_memory {
+; CHECK-LABEL: @must_execute_trivial(
+entry:
+  %y = load i64, i64* @y, align 8
+  %y.cmp = icmp eq i64 %y, 0
+  br label %for.body
+; CHECK:   %[[Y:.*]] = load i64, i64* @y
+; CHECK:   %[[YCMP:.*]] = icmp eq i64 %[[Y]], 0
+; CHECK:   br i1 %[[YCMP]], label %[[EXIT_SPLIT:.*]], label %[[PH:.*]]
+;
+; CHECK: [[PH]]:
+; CHECK:   br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  br i1 %y.cmp, label %for.end, label %if.then4
+; CHECK: br label %if.then4
+
+if.then4:
+  call void @unknown()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %inc, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+; CHECK: for.end:
+; CHECK:   br label %[[EXIT_SPLIT]]
+;
+; CHECK: [[EXIT_SPLIT]]:
+; CHECK:   ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-cost.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,502 @@
+; Specifically exercise the cost modeling for non-trivial loop unswitching.
+;
+; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -unswitch-threshold=5 -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -unswitch-threshold=5 -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -unswitch-threshold=5 -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
+
+declare void @a()
+declare void @b()
+declare void @x()
+
+; First establish enough code size in the duplicated 'loop_begin' block to
+; suppress unswitching.
+define void @test_no_unswitch(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_no_unswitch(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+;
+; We shouldn't have unswitched into any other block either.
+; CHECK-NOT:     br i1 %cond
+
+loop_begin:
+  call void @x()
+  call void @x()
+  call void @x()
+  call void @x()
+  br i1 %cond, label %loop_a, label %loop_b
+; CHECK:       loop_begin:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+  call void @a()
+  br label %loop_latch
+
+loop_b:
+  call void @b()
+  br label %loop_latch
+
+loop_latch:
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+  ret void
+}
+
+; Now check that the smaller formulation of 'loop_begin' does in fact unswitch
+; with our low threshold.
+define void @test_unswitch(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_unswitch(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+  call void @x()
+  br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+  call void @a()
+  br label %loop_latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br label %loop_a.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    call void @a()
+; CHECK-NEXT:    br label %loop_latch.us
+;
+; CHECK:       loop_latch.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b:
+  call void @b()
+  br label %loop_latch
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br label %loop_b
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    br label %loop_latch
+;
+; CHECK:       loop_latch:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_latch:
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+  ret void
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret void
+}
+
+; Check that even with large amounts of code on either side of the unswitched
+; branch, if that code would be kept in only one of the unswitched clones it
+; doesn't contribute to the cost.
+define void @test_unswitch_non_dup_code(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_unswitch_non_dup_code(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+  call void @x()
+  br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+  call void @a()
+  call void @a()
+  call void @a()
+  call void @a()
+  br label %loop_latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br label %loop_a.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    call void @a()
+; CHECK-NEXT:    call void @a()
+; CHECK-NEXT:    call void @a()
+; CHECK-NEXT:    call void @a()
+; CHECK-NEXT:    br label %loop_latch.us
+;
+; CHECK:       loop_latch.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b:
+  call void @b()
+  call void @b()
+  call void @b()
+  call void @b()
+  br label %loop_latch
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br label %loop_b
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    br label %loop_latch
+;
+; CHECK:       loop_latch:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_latch:
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+  ret void
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret void
+}
+
+; Much like with non-duplicated code directly in the successor, we also won't
+; duplicate even interesting CFGs.
+define void @test_unswitch_non_dup_code_in_cfg(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_unswitch_non_dup_code_in_cfg(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+  call void @x()
+  br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %loop_a_a, label %loop_a_b
+
+loop_a_a:
+  call void @a()
+  br label %loop_latch
+
+loop_a_b:
+  call void @a()
+  br label %loop_latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br label %loop_a.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_a_a.us, label %loop_a_b.us
+;
+; CHECK:       loop_a_b.us:
+; CHECK-NEXT:    call void @a()
+; CHECK-NEXT:    br label %loop_latch.us
+;
+; CHECK:       loop_a_a.us:
+; CHECK-NEXT:    call void @a()
+; CHECK-NEXT:    br label %loop_latch.us
+;
+; CHECK:       loop_latch.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %loop_b_a, label %loop_b_b
+
+loop_b_a:
+  call void @b()
+  br label %loop_latch
+
+loop_b_b:
+  call void @b()
+  br label %loop_latch
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br label %loop_b
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_b_a, label %loop_b_b
+;
+; CHECK:       loop_b_a:
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    br label %loop_latch
+;
+; CHECK:       loop_b_b:
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    br label %loop_latch
+;
+; CHECK:       loop_latch:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_latch:
+  %v3 = load i1, i1* %ptr
+  br i1 %v3, label %loop_begin, label %loop_exit
+
+loop_exit:
+  ret void
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret void
+}
+
+; Check that even if there is *some* non-duplicated code on one side of an
+; unswitch, we don't count any other code in the loop that will in fact have to
+; be duplicated.
+define void @test_no_unswitch_non_dup_code(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_no_unswitch_non_dup_code(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+;
+; We shouldn't have unswitched into any other block either.
+; CHECK-NOT:     br i1 %cond
+
+loop_begin:
+  call void @x()
+  br i1 %cond, label %loop_a, label %loop_b
+; CHECK:       loop_begin:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %loop_a_a, label %loop_a_b
+
+loop_a_a:
+  call void @a()
+  br label %loop_latch
+
+loop_a_b:
+  call void @a()
+  br label %loop_latch
+
+loop_b:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %loop_b_a, label %loop_b_b
+
+loop_b_a:
+  call void @b()
+  br label %loop_latch
+
+loop_b_b:
+  call void @b()
+  br label %loop_latch
+
+loop_latch:
+  call void @x()
+  call void @x()
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+  ret void
+}
+
+; Check that we still unswitch when the exit block contains lots of code, even
+; though we do clone the exit block as part of unswitching. This should work
+; because we should split the exit block before anything inside it.
+define void @test_unswitch_large_exit(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_unswitch_large_exit(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+  call void @x()
+  br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+  call void @a()
+  br label %loop_latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br label %loop_a.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    call void @a()
+; CHECK-NEXT:    br label %loop_latch.us
+;
+; CHECK:       loop_latch.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b:
+  call void @b()
+  br label %loop_latch
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br label %loop_b
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    br label %loop_latch
+;
+; CHECK:       loop_latch:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_latch:
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+  call void @x()
+  call void @x()
+  call void @x()
+  call void @x()
+  ret void
+; CHECK:       loop_exit:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    ret void
+}
+
+; Check that we handle a dedicated exit edge unswitch which is still
+; non-trivial and has lots of code in the exit.
+define void @test_unswitch_dedicated_exiting(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_unswitch_dedicated_exiting(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+  call void @x()
+  br i1 %cond, label %loop_a, label %loop_b_exit
+
+loop_a:
+  call void @a()
+  br label %loop_latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br label %loop_a.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    call void @a()
+; CHECK-NEXT:    br label %loop_latch.us
+;
+; CHECK:       loop_latch.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b_exit:
+  call void @b()
+  call void @b()
+  call void @b()
+  call void @b()
+  ret void
+; The 'loop_b_exit' unswitched exit path.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    call void @x()
+; CHECK-NEXT:    br label %loop_b_exit
+;
+; CHECK:       loop_b_exit:
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    call void @b()
+; CHECK-NEXT:    ret void
+
+loop_latch:
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+  ret void
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,4216 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s
+
+declare i32 @a()
+declare i32 @b()
+declare i32 @c()
+declare i32 @d()
+
+declare void @sink1(i32)
+declare void @sink2(i32)
+
+declare i1 @cond()
+declare i32 @cond.i32()
+
+; Negative test: we cannot unswitch convergent calls.
+define void @test_no_unswitch_convergent(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_no_unswitch_convergent(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+;
+; We shouldn't have unswitched into any other block either.
+; CHECK-NOT:     br i1 %cond
+
+loop_begin:
+  br i1 %cond, label %loop_a, label %loop_b
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+  call i32 @a() convergent
+  br label %loop_latch
+
+loop_b:
+  call i32 @b()
+  br label %loop_latch
+
+loop_latch:
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+  ret void
+}
+
+; Negative test: we cannot unswitch noduplicate calls.
+define void @test_no_unswitch_noduplicate(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_no_unswitch_noduplicate(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+;
+; We shouldn't have unswitched into any other block either.
+; CHECK-NOT:     br i1 %cond
+
+loop_begin:
+  br i1 %cond, label %loop_a, label %loop_b
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+  call i32 @a() noduplicate
+  br label %loop_latch
+
+loop_b:
+  call i32 @b()
+  br label %loop_latch
+
+loop_latch:
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+  ret void
+}
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Negative test: we cannot unswitch when tokens are used across blocks as we
+; might introduce PHIs.
+define void @test_no_unswitch_cross_block_token(i1* %ptr, i1 %cond) nounwind personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test_no_unswitch_cross_block_token(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+;
+; We shouldn't have unswitched into any other block either.
+; CHECK-NOT:     br i1 %cond
+
+loop_begin:
+  br i1 %cond, label %loop_a, label %loop_b
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br i1 %cond, label %loop_a, label %loop_b
+
+loop_a:
+  call i32 @a()
+  br label %loop_cont
+
+loop_b:
+  call i32 @b()
+  br label %loop_cont
+
+loop_cont:
+  invoke i32 @a()
+          to label %loop_latch unwind label %loop_catch
+
+loop_latch:
+  br label %loop_begin
+
+loop_catch:
+  %catch = catchswitch within none [label %loop_catch_latch, label %loop_exit] unwind to caller
+
+loop_catch_latch:
+  %catchpad_latch = catchpad within %catch []
+  catchret from %catchpad_latch to label %loop_begin
+
+loop_exit:
+  %catchpad_exit = catchpad within %catch []
+  catchret from %catchpad_exit to label %exit
+
+exit:
+  ret void
+}
+
+
+; Non-trivial loop unswitching where there are two distinct trivial conditions
+; to unswitch within the loop.
+define i32 @test1(i1* %ptr, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test1(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+  br i1 %cond1, label %loop_a, label %loop_b
+
+loop_a:
+  call i32 @a()
+  br label %latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    br label %loop_a.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    call i32 @a()
+; CHECK-NEXT:    br label %latch.us
+;
+; CHECK:       latch.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b:
+  br i1 %cond2, label %loop_b_a, label %loop_b_b
+; The second unswitched condition.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br i1 %cond2, label %entry.split.split.us, label %entry.split.split
+
+loop_b_a:
+  call i32 @b()
+  br label %latch
+; The 'loop_b_a' unswitched loop.
+;
+; CHECK:       entry.split.split.us:
+; CHECK-NEXT:    br label %loop_begin.us1
+;
+; CHECK:       loop_begin.us1:
+; CHECK-NEXT:    br label %loop_b.us
+;
+; CHECK:       loop_b.us:
+; CHECK-NEXT:    br label %loop_b_a.us
+;
+; CHECK:       loop_b_a.us:
+; CHECK-NEXT:    call i32 @b()
+; CHECK-NEXT:    br label %latch.us2
+;
+; CHECK:       latch.us2:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us1, label %loop_exit.split.split.us
+;
+; CHECK:       loop_exit.split.split.us:
+; CHECK-NEXT:    br label %loop_exit.split
+
+loop_b_b:
+  call i32 @c()
+  br label %latch
+; The 'loop_b_b' unswitched loop.
+;
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %loop_b
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    br label %loop_b_b
+;
+; CHECK:       loop_b_b:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %latch
+;
+; CHECK:       latch:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit.split.split
+;
+; CHECK:       loop_exit.split.split:
+; CHECK-NEXT:    br label %loop_exit.split
+
+latch:
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret
+}
+
+define i32 @test2(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr, i32* %c.ptr) {
+; CHECK-LABEL: @test2(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %v = load i1, i1* %ptr
+  br i1 %cond1, label %loop_a, label %loop_b
+
+loop_a:
+  %a = load i32, i32* %a.ptr
+  %ac = load i32, i32* %c.ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+; The 'loop_a' unswitched loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br label %loop_a.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[AC:.*]] = load i32, i32* %c.ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.backedge.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a.us ]
+; CHECK-NEXT:    %[[AC_LCSSA:.*]] = phi i32 [ %[[AC]], %loop_a.us ]
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b:
+  %b = load i32, i32* %b.ptr
+  %bc = load i32, i32* %c.ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br label %loop_b
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    %[[BC:.*]] = load i32, i32* %c.ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.backedge, label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ]
+; CHECK-NEXT:    %[[BC_LCSSA:.*]] = phi i32 [ %[[BC]], %loop_b ]
+; CHECK-NEXT:    br label %loop_exit
+
+loop_exit:
+  %ab.phi = phi i32 [ %a, %loop_a ], [ %b, %loop_b ]
+  %c.phi = phi i32 [ %ac, %loop_a ], [ %bc, %loop_b ]
+  %result = add i32 %ab.phi, %c.phi
+  ret i32 %result
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ]
+; CHECK-NEXT:    %[[C_PHI:.*]] = phi i32 [ %[[BC_LCSSA]], %loop_exit.split ], [ %[[AC_LCSSA]], %loop_exit.split.us ]
+; CHECK-NEXT:    %[[RESULT:.*]] = add i32 %[[AB_PHI]], %[[C_PHI]]
+; CHECK-NEXT:    ret i32 %[[RESULT]]
+}
+
+; Test a non-trivial unswitch of an exiting edge to an exit block with other
+; in-loop predecessors.
+define i32 @test3a(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test3a(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %v = load i1, i1* %ptr
+  %a = load i32, i32* %a.ptr
+  br i1 %cond1, label %loop_exit, label %loop_b
+; The 'loop_exit' clone.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ]
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b:
+  %b = load i32, i32* %b.ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_b
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ]
+; CHECK-NEXT:    br label %loop_exit
+
+loop_exit:
+  %ab.phi = phi i32 [ %a, %loop_begin ], [ %b, %loop_b ]
+  ret i32 %ab.phi
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ]
+; CHECK-NEXT:    ret i32 %[[AB_PHI]]
+}
+
+; Test a non-trivial unswitch of an exiting edge to an exit block with other
+; in-loop predecessors. This is the same as @test3a but with the reversed order
+; of successors so that the exiting edge is *not* the cloned edge.
+define i32 @test3b(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test3b(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %v = load i1, i1* %ptr
+  %a = load i32, i32* %a.ptr
+  br i1 %cond1, label %loop_b, label %loop_exit
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_b.us
+;
+; CHECK:       loop_b.us:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b.us ]
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b:
+  %b = load i32, i32* %b.ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+; The original loop, now non-looping due to unswitching..
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_exit:
+  %ab.phi = phi i32 [ %b, %loop_b ], [ %a, %loop_begin ]
+  ret i32 %ab.phi
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[AB_PHI:.*]] = phi i32 [ %[[A]], %loop_exit.split ], [ %[[B_LCSSA]], %loop_exit.split.us ]
+; CHECK-NEXT:    ret i32 %[[AB_PHI]]
+}
+
+; Test a non-trivial unswitch of an exiting edge to an exit block with no other
+; in-loop predecessors.
+define void @test4a(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test4a(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %v = load i1, i1* %ptr
+  %a = load i32, i32* %a.ptr
+  br i1 %cond1, label %loop_exit1, label %loop_b
+; The 'loop_exit' clone.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_exit1.split.us
+;
+; CHECK:       loop_exit1.split.us:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ]
+; CHECK-NEXT:    br label %loop_exit1
+
+loop_b:
+  %b = load i32, i32* %b.ptr
+  br i1 %v, label %loop_begin, label %loop_exit2
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_b
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit2
+
+loop_exit1:
+  %a.phi = phi i32 [ %a, %loop_begin ]
+  call void @sink1(i32 %a.phi)
+  ret void
+; CHECK:       loop_exit1:
+; CHECK-NEXT:    call void @sink1(i32 %[[A_LCSSA]])
+; CHECK-NEXT:    ret void
+
+loop_exit2:
+  %b.phi = phi i32 [ %b, %loop_b ]
+  call void @sink2(i32 %b.phi)
+  ret void
+; CHECK:       loop_exit2:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ]
+; CHECK-NEXT:    call void @sink2(i32 %[[B_LCSSA]])
+; CHECK-NEXT:    ret void
+}
+
+; Test a non-trivial unswitch of an exiting edge to an exit block with no other
+; in-loop predecessors. This is the same as @test4a but with the edges reversed
+; so that the exiting edge is *not* the cloned edge.
+define void @test4b(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test4b(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %v = load i1, i1* %ptr
+  %a = load i32, i32* %a.ptr
+  br i1 %cond1, label %loop_b, label %loop_exit1
+; The 'loop_b' clone.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_b.us
+;
+; CHECK:       loop_b.us:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit2.split.us
+;
+; CHECK:       loop_exit2.split.us:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b.us ]
+; CHECK-NEXT:    br label %loop_exit2
+
+loop_b:
+  %b = load i32, i32* %b.ptr
+  br i1 %v, label %loop_begin, label %loop_exit2
+; The 'loop_exit' unswitched path.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_exit1
+
+loop_exit1:
+  %a.phi = phi i32 [ %a, %loop_begin ]
+  call void @sink1(i32 %a.phi)
+  ret void
+; CHECK:       loop_exit1:
+; CHECK-NEXT:    %[[A_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ]
+; CHECK-NEXT:    call void @sink1(i32 %[[A_PHI]])
+; CHECK-NEXT:    ret void
+
+loop_exit2:
+  %b.phi = phi i32 [ %b, %loop_b ]
+  call void @sink2(i32 %b.phi)
+  ret void
+; CHECK:       loop_exit2:
+; CHECK-NEXT:    call void @sink2(i32 %[[B_LCSSA]])
+; CHECK-NEXT:    ret void
+}
+
+; Test a non-trivial unswitch of an exiting edge to an exit block with no other
+; in-loop predecessors. This is the same as @test4a but with a common merge
+; block after the independent loop exits. This requires a different structural
+; update to the dominator tree.
+define void @test4c(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test4c(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %v = load i1, i1* %ptr
+  %a = load i32, i32* %a.ptr
+  br i1 %cond1, label %loop_exit1, label %loop_b
+; The 'loop_exit' clone.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_exit1.split.us
+;
+; CHECK:       loop_exit1.split.us:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_begin.us ]
+; CHECK-NEXT:    br label %loop_exit1
+
+loop_b:
+  %b = load i32, i32* %b.ptr
+  br i1 %v, label %loop_begin, label %loop_exit2
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_b
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit2
+
+loop_exit1:
+  %a.phi = phi i32 [ %a, %loop_begin ]
+  call void @sink1(i32 %a.phi)
+  br label %exit
+; CHECK:       loop_exit1:
+; CHECK-NEXT:    call void @sink1(i32 %[[A_LCSSA]])
+; CHECK-NEXT:    br label %exit
+
+loop_exit2:
+  %b.phi = phi i32 [ %b, %loop_b ]
+  call void @sink2(i32 %b.phi)
+  br label %exit
+; CHECK:       loop_exit2:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b ]
+; CHECK-NEXT:    call void @sink2(i32 %[[B_LCSSA]])
+; CHECK-NEXT:    br label %exit
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; Test that we can unswitch a condition out of multiple layers of a loop nest.
+define i32 @test5(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test5(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %loop_begin.split.us, label %entry.split
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %loop_begin.split
+
+loop_begin:
+  br label %inner_loop_begin
+
+inner_loop_begin:
+  %v = load i1, i1* %ptr
+  %a = load i32, i32* %a.ptr
+  br i1 %cond1, label %loop_exit, label %inner_loop_b
+; The 'loop_exit' clone.
+;
+; CHECK:       loop_begin.split.us:
+; CHECK-NEXT:    br label %inner_loop_begin.us
+;
+; CHECK:       inner_loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_exit.loopexit.split.us
+;
+; CHECK:       loop_exit.loopexit.split.us:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ]
+; CHECK-NEXT:    br label %loop_exit
+
+inner_loop_b:
+  %b = load i32, i32* %b.ptr
+  br i1 %v, label %inner_loop_begin, label %loop_latch
+; The 'inner_loop_b' unswitched loop.
+;
+; CHECK:       loop_begin.split:
+; CHECK-NEXT:    br label %inner_loop_begin
+;
+; CHECK:       inner_loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_b
+;
+; CHECK:       inner_loop_b:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_begin, label %loop_latch
+
+loop_latch:
+  %b.phi = phi i32 [ %b, %inner_loop_b ]
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %loop_begin, label %loop_exit
+; CHECK:       loop_latch:
+; CHECK-NEXT:    %[[B_INNER_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_b ]
+; CHECK-NEXT:    %[[V2:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V2]], label %loop_begin, label %loop_exit.loopexit1
+
+loop_exit:
+  %ab.phi = phi i32 [ %a, %inner_loop_begin ], [ %b.phi, %loop_latch ]
+  ret i32 %ab.phi
+; CHECK:       loop_exit.loopexit:
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit.loopexit1:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B_INNER_LCSSA]], %loop_latch ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit ], [ %[[B_LCSSA]], %loop_exit.loopexit1 ]
+; CHECK-NEXT:    ret i32 %[[AB_PHI]]
+}
+
+; Test that we can unswitch a condition where we end up only cloning some of
+; the nested loops and needing to delete some of the nested loops.
+define i32 @test6(i1* %ptr, i1 %cond1, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test6(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %v = load i1, i1* %ptr
+  br i1 %cond1, label %loop_a, label %loop_b
+
+loop_a:
+  br label %loop_a_inner
+
+loop_a_inner:
+  %va = load i1, i1* %ptr
+  %a = load i32, i32* %a.ptr
+  br i1 %va, label %loop_a_inner, label %loop_a_inner_exit
+
+loop_a_inner_exit:
+  %a.lcssa = phi i32 [ %a, %loop_a_inner ]
+  br label %latch
+; The 'loop_a' cloned loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br label %loop_a.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    br label %loop_a_inner.us
+;
+; CHECK:       loop_a_inner.us
+; CHECK-NEXT:    %[[VA:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br i1 %[[VA]], label %loop_a_inner.us, label %loop_a_inner_exit.us
+;
+; CHECK:       loop_a_inner_exit.us:
+; CHECK-NEXT:    %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a_inner.us ]
+; CHECK-NEXT:    br label %latch.us
+;
+; CHECK:       latch.us:
+; CHECK-NEXT:    %[[A_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %loop_a_inner_exit.us ]
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A_PHI]], %latch.us ]
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b:
+  br label %loop_b_inner
+
+loop_b_inner:
+  %vb = load i1, i1* %ptr
+  %b = load i32, i32* %b.ptr
+  br i1 %vb, label %loop_b_inner, label %loop_b_inner_exit
+
+loop_b_inner_exit:
+  %b.lcssa = phi i32 [ %b, %loop_b_inner ]
+  br label %latch
+
+latch:
+  %ab.phi = phi i32 [ %a.lcssa, %loop_a_inner_exit ], [ %b.lcssa, %loop_b_inner_exit ]
+  br i1 %v, label %loop_begin, label %loop_exit
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br label %loop_b
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    br label %loop_b_inner
+;
+; CHECK:       loop_b_inner
+; CHECK-NEXT:    %[[VB:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br i1 %[[VB]], label %loop_b_inner, label %loop_b_inner_exit
+;
+; CHECK:       loop_b_inner_exit:
+; CHECK-NEXT:    %[[B_INNER_LCSSA:.*]] = phi i32 [ %[[B]], %loop_b_inner ]
+; CHECK-NEXT:    br label %latch
+;
+; CHECK:       latch:
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B_INNER_LCSSA]], %latch ]
+; CHECK-NEXT:    br label %loop_exit
+
+loop_exit:
+  %ab.lcssa = phi i32 [ %ab.phi, %latch ]
+  ret i32 %ab.lcssa
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[AB_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA]], %loop_exit.split.us ]
+; CHECK-NEXT:    ret i32 %[[AB_PHI]]
+}
+
+; Test that when unswitching a deeply nested loop condition in a way that
+; produces a non-loop clone that can reach multiple exit blocks which are part
+; of different outer loops we correctly divide the cloned loop blocks between
+; the outer loops based on reachability.
+define i32 @test7a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test7a(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %a = load i32, i32* %a.ptr
+  br label %inner_loop_begin
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_begin
+
+inner_loop_begin:
+  %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ]
+  %cond = load i1, i1* %cond.ptr
+  %b = load i32, i32* %b.ptr
+  br label %inner_inner_loop_begin
+; CHECK:       inner_loop_begin:
+; CHECK-NEXT:    %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ]
+; CHECK-NEXT:    %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split
+
+inner_inner_loop_begin:
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b
+
+inner_inner_loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %loop_exit, label %inner_inner_loop_c
+
+inner_inner_loop_b:
+  %v3 = load i1, i1* %ptr
+  br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c
+
+inner_inner_loop_c:
+  %v4 = load i1, i1* %ptr
+  br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d
+
+inner_inner_loop_d:
+  br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_begin
+; The cloned copy that always exits with the adjustments required to fix up
+; loop exits.
+;
+; CHECK:       inner_loop_begin.split.us:
+; CHECK-NEXT:    br label %inner_inner_loop_begin.us
+;
+; CHECK:       inner_inner_loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us
+;
+; CHECK:       inner_inner_loop_b.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us.loopexit
+;
+; CHECK:       inner_inner_loop_a.us:
+; CHECK-NEXT:    %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin.us ]
+; CHECK-NEXT:    %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin.us ]
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us
+;
+; CHECK:       inner_inner_loop_c.us.loopexit:
+; CHECK-NEXT:    br label %inner_inner_loop_c.us
+;
+; CHECK:       inner_inner_loop_c.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us
+;
+; CHECK:       inner_inner_loop_d.us:
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit.split
+;
+; CHECK:       inner_inner_loop_exit.split.us:
+; CHECK-NEXT:    br label %inner_inner_loop_exit
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a.us ]
+; CHECK-NEXT:    %[[B_LCSSA_US:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a.us ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       inner_loop_exit.loopexit.split.us:
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit
+;
+; The original copy that continues to loop.
+;
+; CHECK:       inner_loop_begin.split:
+; CHECK-NEXT:    br label %inner_inner_loop_begin
+;
+; CHECK:       inner_inner_loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b
+;
+; CHECK:       inner_inner_loop_a:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c
+;
+; CHECK:       inner_inner_loop_b:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c
+;
+; CHECK:       inner_inner_loop_c:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d
+;
+; CHECK:       inner_inner_loop_d:
+; CHECK-NEXT:    br label %inner_inner_loop_begin
+;
+; CHECK:       inner_inner_loop_exit.split:
+; CHECK-NEXT:    br label %inner_inner_loop_exit
+
+inner_inner_loop_exit:
+  %a2 = load i32, i32* %a.ptr
+  %v5 = load i1, i1* %ptr
+  br i1 %v5, label %inner_loop_exit, label %inner_loop_begin
+; CHECK:       inner_inner_loop_exit:
+; CHECK-NEXT:    %[[A2]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin
+
+inner_loop_exit:
+  br label %loop_begin
+; CHECK:       inner_loop_exit.loopexit.split:
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit
+;
+; CHECK:       inner_loop_exit.loopexit:
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit.loopexit1:
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ]
+  %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ]
+  %result = add i32 %a.lcssa, %b.lcssa
+  ret i32 %result
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ]
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT:    %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT:    %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]]
+; CHECK-NEXT:    ret i32 %[[RESULT]]
+}
+
+; Same pattern as @test7a but here the original loop becomes a non-loop that
+; can reach multiple exit blocks which are part of different outer loops.
+define i32 @test7b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test7b(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %a = load i32, i32* %a.ptr
+  br label %inner_loop_begin
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_begin
+
+inner_loop_begin:
+  %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ]
+  %cond = load i1, i1* %cond.ptr
+  %b = load i32, i32* %b.ptr
+  br label %inner_inner_loop_begin
+; CHECK:       inner_loop_begin:
+; CHECK-NEXT:    %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ]
+; CHECK-NEXT:    %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split
+
+inner_inner_loop_begin:
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b
+
+inner_inner_loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %loop_exit, label %inner_inner_loop_c
+
+inner_inner_loop_b:
+  %v3 = load i1, i1* %ptr
+  br i1 %v3, label %inner_inner_loop_exit, label %inner_inner_loop_c
+
+inner_inner_loop_c:
+  %v4 = load i1, i1* %ptr
+  br i1 %v4, label %inner_loop_exit, label %inner_inner_loop_d
+
+inner_inner_loop_d:
+  br i1 %cond, label %inner_inner_loop_begin, label %inner_loop_exit
+; The cloned copy that continues looping.
+;
+; CHECK:       inner_loop_begin.split.us:
+; CHECK-NEXT:    br label %inner_inner_loop_begin.us
+;
+; CHECK:       inner_inner_loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us
+;
+; CHECK:       inner_inner_loop_b.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_c.us
+;
+; CHECK:       inner_inner_loop_a.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit.split.us, label %inner_inner_loop_c.us
+;
+; CHECK:       inner_inner_loop_c.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_exit.loopexit.split.us, label %inner_inner_loop_d.us
+;
+; CHECK:       inner_inner_loop_d.us:
+; CHECK-NEXT:    br label %inner_inner_loop_begin.us
+;
+; CHECK:       inner_inner_loop_exit.split.us:
+; CHECK-NEXT:    br label %inner_inner_loop_exit
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ]
+; CHECK-NEXT:    %[[B_LCSSA_US:.*]] = phi i32 [ %[[B]], %inner_inner_loop_a.us ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       inner_loop_exit.loopexit.split.us:
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit
+;
+; The original copy that now always exits and needs adjustments for exit
+; blocks.
+;
+; CHECK:       inner_loop_begin.split:
+; CHECK-NEXT:    br label %inner_inner_loop_begin
+;
+; CHECK:       inner_inner_loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b
+;
+; CHECK:       inner_inner_loop_a:
+; CHECK-NEXT:    %[[A_NEW_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_begin ]
+; CHECK-NEXT:    %[[B_NEW_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_begin ]
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit.split, label %inner_inner_loop_c
+;
+; CHECK:       inner_inner_loop_b:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_exit.split, label %inner_inner_loop_c.loopexit
+;
+; CHECK:       inner_inner_loop_c.loopexit:
+; CHECK-NEXT:    br label %inner_inner_loop_c
+;
+; CHECK:       inner_inner_loop_c:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_exit.loopexit.split, label %inner_inner_loop_d
+;
+; CHECK:       inner_inner_loop_d:
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit.split
+;
+; CHECK:       inner_inner_loop_exit.split:
+; CHECK-NEXT:    br label %inner_inner_loop_exit
+
+inner_inner_loop_exit:
+  %a2 = load i32, i32* %a.ptr
+  %v5 = load i1, i1* %ptr
+  br i1 %v5, label %inner_loop_exit, label %inner_loop_begin
+; CHECK:       inner_inner_loop_exit:
+; CHECK-NEXT:    %[[A2]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin
+
+inner_loop_exit:
+  br label %loop_begin
+; CHECK:       inner_loop_exit.loopexit.split:
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit
+;
+; CHECK:       inner_loop_exit.loopexit:
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit.loopexit1:
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a.phi, %inner_inner_loop_a ]
+  %b.lcssa = phi i32 [ %b, %inner_inner_loop_a ]
+  %result = add i32 %a.lcssa, %b.lcssa
+  ret i32 %result
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A_NEW_LCSSA]], %inner_inner_loop_a ]
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B_NEW_LCSSA]], %inner_inner_loop_a ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT:    %[[B_PHI:.*]] = phi i32 [ %[[B_LCSSA]], %loop_exit.split ], [ %[[B_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT:    %[[RESULT:.*]] = add i32 %[[A_PHI]], %[[B_PHI]]
+; CHECK-NEXT:    ret i32 %[[RESULT]]
+}
+
+; Test that when the exit block set of an inner loop changes to start at a less
+; high level of the loop nest we correctly hoist the loop up the nest.
+define i32 @test8a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test8a(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %a = load i32, i32* %a.ptr
+  br label %inner_loop_begin
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_begin
+
+inner_loop_begin:
+  %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ]
+  %cond = load i1, i1* %cond.ptr
+  %b = load i32, i32* %b.ptr
+  br label %inner_inner_loop_begin
+; CHECK:       inner_loop_begin:
+; CHECK-NEXT:    %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ]
+; CHECK-NEXT:    %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split
+
+inner_inner_loop_begin:
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b
+
+inner_inner_loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit
+
+inner_inner_loop_b:
+  br i1 %cond, label %inner_inner_loop_latch, label %inner_inner_loop_exit
+
+inner_inner_loop_latch:
+  br label %inner_inner_loop_begin
+; The cloned region is now an exit from the inner loop.
+;
+; CHECK:       inner_loop_begin.split.us:
+; CHECK-NEXT:    %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ]
+; CHECK-NEXT:    br label %inner_inner_loop_begin.us
+;
+; CHECK:       inner_inner_loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us
+;
+; CHECK:       inner_inner_loop_b.us:
+; CHECK-NEXT:    br label %inner_inner_loop_latch.us
+;
+; CHECK:       inner_inner_loop_a.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us
+;
+; CHECK:       inner_inner_loop_latch.us:
+; CHECK-NEXT:    br label %inner_inner_loop_begin.us
+;
+; CHECK:       inner_loop_exit.loopexit.split.us:
+; CHECK-NEXT:    %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a.us ]
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit
+;
+; The original region exits the loop earlier.
+;
+; CHECK:       inner_loop_begin.split:
+; CHECK-NEXT:    br label %inner_inner_loop_begin
+;
+; CHECK:       inner_inner_loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b
+;
+; CHECK:       inner_inner_loop_a:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split
+;
+; CHECK:       inner_inner_loop_b:
+; CHECK-NEXT:    br label %inner_inner_loop_exit
+;
+; CHECK:       inner_inner_loop_latch:
+; CHECK-NEXT:    br label %inner_inner_loop_begin
+
+inner_inner_loop_exit:
+  %a2 = load i32, i32* %a.ptr
+  %v4 = load i1, i1* %ptr
+  br i1 %v4, label %inner_loop_exit, label %inner_loop_begin
+; CHECK:       inner_inner_loop_exit:
+; CHECK-NEXT:    %[[A2]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin
+
+inner_loop_exit:
+  %v5 = load i1, i1* %ptr
+  br i1 %v5, label %loop_exit, label %loop_begin
+; CHECK:       inner_loop_exit.loopexit.split:
+; CHECK-NEXT:    %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a ]
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit
+;
+; CHECK:       inner_loop_exit.loopexit:
+; CHECK-NEXT:    %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ]
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit.loopexit1:
+; CHECK-NEXT:    %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ]
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit:
+; CHECK-NEXT:    %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ]
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit, label %loop_begin
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ]
+  ret i32 %a.lcssa
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ]
+; CHECK-NEXT:    ret i32 %[[A_LCSSA]]
+}
+
+; Same pattern as @test8a but where the original loop looses an exit block and
+; needs to be hoisted up the nest.
+define i32 @test8b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test8b(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %a = load i32, i32* %a.ptr
+  br label %inner_loop_begin
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_begin
+
+inner_loop_begin:
+  %a.phi = phi i32 [ %a, %loop_begin ], [ %a2, %inner_inner_loop_exit ]
+  %cond = load i1, i1* %cond.ptr
+  %b = load i32, i32* %b.ptr
+  br label %inner_inner_loop_begin
+; CHECK:       inner_loop_begin:
+; CHECK-NEXT:    %[[A_INNER_PHI:.*]] = phi i32 [ %[[A]], %loop_begin ], [ %[[A2:.*]], %inner_inner_loop_exit ]
+; CHECK-NEXT:    %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br i1 %[[COND]], label %inner_loop_begin.split.us, label %inner_loop_begin.split
+
+inner_inner_loop_begin:
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %inner_inner_loop_a, label %inner_inner_loop_b
+
+inner_inner_loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %inner_inner_loop_latch, label %inner_loop_exit
+
+inner_inner_loop_b:
+  br i1 %cond, label %inner_inner_loop_exit, label %inner_inner_loop_latch
+
+inner_inner_loop_latch:
+  br label %inner_inner_loop_begin
+; The cloned region is similar to before but with one earlier exit.
+;
+; CHECK:       inner_loop_begin.split.us:
+; CHECK-NEXT:    br label %inner_inner_loop_begin.us
+;
+; CHECK:       inner_inner_loop_begin.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_a.us, label %inner_inner_loop_b.us
+;
+; CHECK:       inner_inner_loop_b.us:
+; CHECK-NEXT:    br label %inner_inner_loop_exit.split.us
+;
+; CHECK:       inner_inner_loop_a.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_latch.us, label %inner_loop_exit.loopexit.split.us
+;
+; CHECK:       inner_inner_loop_latch.us:
+; CHECK-NEXT:    br label %inner_inner_loop_begin.us
+;
+; CHECK:       inner_inner_loop_exit.split.us:
+; CHECK-NEXT:    br label %inner_inner_loop_exit
+;
+; CHECK:       inner_loop_exit.loopexit.split.us:
+; CHECK-NEXT:    %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_a.us ]
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit
+;
+; The original region is now an exit in the preheader.
+;
+; CHECK:       inner_loop_begin.split:
+; CHECK-NEXT:    %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_begin ]
+; CHECK-NEXT:    br label %inner_inner_loop_begin
+;
+; CHECK:       inner_inner_loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_a, label %inner_inner_loop_b
+;
+; CHECK:       inner_inner_loop_a:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_latch, label %inner_loop_exit.loopexit.split
+;
+; CHECK:       inner_inner_loop_b:
+; CHECK-NEXT:    br label %inner_inner_loop_latch
+;
+; CHECK:       inner_inner_loop_latch:
+; CHECK-NEXT:    br label %inner_inner_loop_begin
+
+inner_inner_loop_exit:
+  %a2 = load i32, i32* %a.ptr
+  %v4 = load i1, i1* %ptr
+  br i1 %v4, label %inner_loop_exit, label %inner_loop_begin
+; CHECK:       inner_inner_loop_exit:
+; CHECK-NEXT:    %[[A2]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_exit.loopexit1, label %inner_loop_begin
+
+inner_loop_exit:
+  %v5 = load i1, i1* %ptr
+  br i1 %v5, label %loop_exit, label %loop_begin
+; CHECK:       inner_loop_exit.loopexit.split:
+; CHECK-NEXT:    %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_a ]
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit
+;
+; CHECK:       inner_loop_exit.loopexit:
+; CHECK-NEXT:    %[[A_INNER_US_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit.split ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit.split.us ]
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit.loopexit1:
+; CHECK-NEXT:    %[[A_INNER_LCSSA2:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_inner_loop_exit ]
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit:
+; CHECK-NEXT:    %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA2]], %inner_loop_exit.loopexit1 ], [ %[[A_INNER_US_PHI]], %inner_loop_exit.loopexit ]
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit, label %loop_begin
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a.phi, %inner_loop_exit ]
+  ret i32 %a.lcssa
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ]
+; CHECK-NEXT:    ret i32 %[[A_LCSSA]]
+}
+
+; Test for when unswitching produces a clone of an inner loop but
+; the clone no longer has an exiting edge *at all* and loops infinitely.
+; Because it doesn't ever exit to the outer loop it is no longer an inner loop
+; but needs to be hoisted up the nest to be a top-level loop.
+define i32 @test9a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test9a(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %b = load i32, i32* %b.ptr
+  %cond = load i1, i1* %cond.ptr
+  br label %inner_loop_begin
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT:    br i1 %[[COND]], label %loop_begin.split.us, label %loop_begin.split
+
+inner_loop_begin:
+  %a = load i32, i32* %a.ptr
+  br i1 %cond, label %inner_loop_latch, label %inner_loop_exit
+
+inner_loop_latch:
+  call void @sink1(i32 %b)
+  br label %inner_loop_begin
+; The cloned inner loop ends up as an infinite loop and thus being a top-level
+; loop with the preheader as an exit block of the outer loop.
+;
+; CHECK:       loop_begin.split.us
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_begin ]
+; CHECK-NEXT:    br label %inner_loop_begin.us
+;
+; CHECK:       inner_loop_begin.us:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_latch.us
+;
+; CHECK:       inner_loop_latch.us:
+; CHECK-NEXT:    call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT:    br label %inner_loop_begin.us
+;
+; The original loop becomes boring non-loop code.
+;
+; CHECK:       loop_begin.split
+; CHECK-NEXT:    br label %inner_loop_begin
+;
+; CHECK:       inner_loop_begin:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_exit
+
+inner_loop_exit:
+  %a.inner_lcssa = phi i32 [ %a, %inner_loop_begin ]
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+; CHECK:       inner_loop_exit:
+; CHECK-NEXT:    %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin ]
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ]
+  ret i32 %a.lcssa
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ]
+; CHECK-NEXT:    ret i32 %[[A_LCSSA]]
+}
+
+; The same core pattern as @test9a, but instead of the cloned loop becoming an
+; infinite loop, the original loop has its only exit unswitched and the
+; original loop becomes infinite and must be hoisted out of the loop nest.
+define i32 @test9b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test9b(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %b = load i32, i32* %b.ptr
+  %cond = load i1, i1* %cond.ptr
+  br label %inner_loop_begin
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT:    br i1 %[[COND]], label %loop_begin.split.us, label %loop_begin.split
+
+inner_loop_begin:
+  %a = load i32, i32* %a.ptr
+  br i1 %cond, label %inner_loop_exit, label %inner_loop_latch
+
+inner_loop_latch:
+  call void @sink1(i32 %b)
+  br label %inner_loop_begin
+; The cloned inner loop becomes a boring non-loop.
+;
+; CHECK:       loop_begin.split.us
+; CHECK-NEXT:    br label %inner_loop_begin.us
+;
+; CHECK:       inner_loop_begin.us:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_exit.split.us
+;
+; CHECK:       inner_loop_exit.split.us
+; CHECK-NEXT:    %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ]
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; The original loop becomes an infinite loop and thus a top-level loop with the
+; preheader as an exit block for the outer loop.
+;
+; CHECK:       loop_begin.split
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %loop_begin ]
+; CHECK-NEXT:    br label %inner_loop_begin
+;
+; CHECK:       inner_loop_begin:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_latch
+;
+; CHECK:       inner_loop_latch:
+; CHECK-NEXT:    call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT:    br label %inner_loop_begin
+
+inner_loop_exit:
+  %a.inner_lcssa = phi i32 [ %a, %inner_loop_begin ]
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+; CHECK:       inner_loop_exit:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ]
+  ret i32 %a.lcssa
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit ]
+; CHECK-NEXT:    ret i32 %[[A_LCSSA]]
+}
+
+; Test that requires re-forming dedicated exits for the cloned loop.
+define i32 @test10a(i1* %ptr, i1 %cond, i32* %a.ptr) {
+; CHECK-LABEL: @test10a(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %a = load i32, i32* %a.ptr
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %loop_a, label %loop_b
+
+loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %loop_exit, label %loop_begin
+
+loop_b:
+  br i1 %cond, label %loop_exit, label %loop_begin
+; The cloned loop with one edge as a direct exit.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_a.us, label %loop_b.us
+;
+; CHECK:       loop_b.us:
+; CHECK-NEXT:    %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin.us ]
+; CHECK-NEXT:    br label %loop_exit.split.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_begin.backedge.us
+;
+; CHECK:       loop_begin.backedge.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_exit.split.us.loopexit:
+; CHECK-NEXT:    %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a.us ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    %[[A_PHI_US:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b.us ], [ %[[A_LCSSA_A]], %loop_exit.split.us.loopexit ]
+; CHECK-NEXT:    br label %loop_exit
+
+; The original loop without one 'loop_exit' edge.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_a, label %loop_b
+;
+; CHECK:       loop_a:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit.split, label %loop_begin.backedge
+;
+; CHECK:       loop_begin.backedge:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    br label %loop_begin.backedge
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ]
+; CHECK-NEXT:    br label %loop_exit
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ]
+  ret i32 %a.lcssa
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[A_PHI_US]], %loop_exit.split.us ]
+; CHECK-NEXT:    ret i32 %[[A_PHI]]
+}
+
+; Test that requires re-forming dedicated exits for the original loop.
+define i32 @test10b(i1* %ptr, i1 %cond, i32* %a.ptr) {
+; CHECK-LABEL: @test10b(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %a = load i32, i32* %a.ptr
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %loop_a, label %loop_b
+
+loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %loop_begin, label %loop_exit
+
+loop_b:
+  br i1 %cond, label %loop_begin, label %loop_exit
+; The cloned loop without one of the exits.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_a.us, label %loop_b.us
+;
+; CHECK:       loop_b.us:
+; CHECK-NEXT:    br label %loop_begin.backedge.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.backedge.us, label %loop_exit.split.us
+;
+; CHECK:       loop_begin.backedge.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ]
+; CHECK-NEXT:    br label %loop_exit
+
+; The original loop without one 'loop_exit' edge.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_a, label %loop_b
+;
+; CHECK:       loop_a:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.backedge, label %loop_exit.split.loopexit
+;
+; CHECK:       loop_begin.backedge:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    %[[A_LCSSA_B:.*]] = phi i32 [ %[[A]], %loop_begin ]
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split.loopexit:
+; CHECK-NEXT:    %[[A_LCSSA_A:.*]] = phi i32 [ %[[A]], %loop_a ]
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[A_PHI_SPLIT:.*]] = phi i32 [ %[[A_LCSSA_B]], %loop_b ], [ %[[A_LCSSA_A]], %loop_exit.split.loopexit ]
+; CHECK-NEXT:    br label %loop_exit
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a, %loop_a ], [ %a, %loop_b ]
+  ret i32 %a.lcssa
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_PHI:.*]] = phi i32 [ %[[A_PHI_SPLIT]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT:    ret i32 %[[A_PHI]]
+}
+
+; Check that if a cloned inner loop after unswitching doesn't loop and directly
+; exits even an outer loop, we don't add the cloned preheader to the outer
+; loop and do add the needed LCSSA phi nodes for the new exit block from the
+; outer loop.
+define i32 @test11a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test11a(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %b = load i32, i32* %b.ptr
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %loop_latch, label %inner_loop_ph
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_latch, label %inner_loop_ph
+
+inner_loop_ph:
+  %cond = load i1, i1* %cond.ptr
+  br label %inner_loop_begin
+; CHECK:       inner_loop_ph:
+; CHECK-NEXT:    %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT:    br i1 %[[COND]], label %inner_loop_ph.split.us, label %inner_loop_ph.split
+
+inner_loop_begin:
+  call void @sink1(i32 %b)
+  %a = load i32, i32* %a.ptr
+  br i1 %cond, label %loop_exit, label %inner_loop_a
+
+inner_loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %inner_loop_exit, label %inner_loop_begin
+; The cloned path doesn't actually loop and is an exit from the outer loop as
+; well.
+;
+; CHECK:       inner_loop_ph.split.us:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ]
+; CHECK-NEXT:    br label %inner_loop_begin.us
+;
+; CHECK:       inner_loop_begin.us:
+; CHECK-NEXT:    call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_exit.loopexit.split.us
+;
+; CHECK:       loop_exit.loopexit.split.us:
+; CHECK-NEXT:    %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_begin.us ]
+; CHECK-NEXT:    br label %loop_exit.loopexit
+;
+; The original remains a loop losing the exit edge.
+;
+; CHECK:       inner_loop_ph.split:
+; CHECK-NEXT:    br label %inner_loop_begin
+;
+; CHECK:       inner_loop_begin:
+; CHECK-NEXT:    call void @sink1(i32 %[[B]])
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_a
+;
+; CHECK:       inner_loop_a:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_exit, label %inner_loop_begin
+
+inner_loop_exit:
+  %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ]
+  %v3 = load i1, i1* %ptr
+  br i1 %v3, label %loop_latch, label %loop_exit
+; CHECK:       inner_loop_exit:
+; CHECK-NEXT:    %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_a ]
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1
+
+loop_latch:
+  br label %loop_begin
+; CHECK:       loop_latch:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ]
+  ret i32 %a.lcssa
+; CHECK:       loop_exit.loopexit:
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit.loopexit1:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %loop_exit.loopexit ], [ %[[A_LCSSA]], %loop_exit.loopexit1 ]
+; CHECK-NEXT:    ret i32 %[[A_PHI]]
+}
+
+; Check that if the original inner loop after unswitching doesn't loop and
+; directly exits even an outer loop, we remove the original preheader from the
+; outer loop and add needed LCSSA phi nodes for the new exit block from the
+; outer loop.
+define i32 @test11b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test11b(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %b = load i32, i32* %b.ptr
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %loop_latch, label %inner_loop_ph
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_latch, label %inner_loop_ph
+
+inner_loop_ph:
+  %cond = load i1, i1* %cond.ptr
+  br label %inner_loop_begin
+; CHECK:       inner_loop_ph:
+; CHECK-NEXT:    %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT:    br i1 %[[COND]], label %inner_loop_ph.split.us, label %inner_loop_ph.split
+
+inner_loop_begin:
+  call void @sink1(i32 %b)
+  %a = load i32, i32* %a.ptr
+  br i1 %cond, label %inner_loop_a, label %loop_exit
+
+inner_loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %inner_loop_exit, label %inner_loop_begin
+; The cloned path continues to loop without the exit out of the entire nest.
+;
+; CHECK:       inner_loop_ph.split.us:
+; CHECK-NEXT:    br label %inner_loop_begin.us
+;
+; CHECK:       inner_loop_begin.us:
+; CHECK-NEXT:    call void @sink1(i32 %[[B]])
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_a.us
+;
+; CHECK:       inner_loop_a.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_exit.split.us, label %inner_loop_begin.us
+;
+; CHECK:       inner_loop_exit.split.us:
+; CHECK-NEXT:    %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_loop_a.us ]
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; The original remains a loop losing the exit edge.
+;
+; CHECK:       inner_loop_ph.split:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_loop_ph ]
+; CHECK-NEXT:    br label %inner_loop_begin
+;
+; CHECK:       inner_loop_begin:
+; CHECK-NEXT:    call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %loop_exit.loopexit
+
+inner_loop_exit:
+  %a.inner_lcssa = phi i32 [ %a, %inner_loop_a ]
+  %v3 = load i1, i1* %ptr
+  br i1 %v3, label %loop_latch, label %loop_exit
+; CHECK:       inner_loop_exit:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_latch, label %loop_exit.loopexit1
+
+loop_latch:
+  br label %loop_begin
+; CHECK:       loop_latch:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a, %inner_loop_begin ], [ %a.inner_lcssa, %inner_loop_exit ]
+  ret i32 %a.lcssa
+; CHECK:       loop_exit.loopexit:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %inner_loop_begin ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit.loopexit1:
+; CHECK-NEXT:    %[[A_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_LCSSA_US]], %inner_loop_exit ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.loopexit ], [ %[[A_LCSSA_US]], %loop_exit.loopexit1 ]
+; CHECK-NEXT:    ret i32 %[[A_PHI]]
+}
+
+; Like test11a, but checking that when the whole thing is wrapped in yet
+; another loop, we correctly attribute the cloned preheader to that outermost
+; loop rather than only handling the case where the preheader is not in any loop
+; at all.
+define i32 @test12a(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test12a(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  br label %inner_loop_begin
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %inner_loop_begin
+
+inner_loop_begin:
+  %b = load i32, i32* %b.ptr
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph
+; CHECK:       inner_loop_begin:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph
+
+inner_inner_loop_ph:
+  %cond = load i1, i1* %cond.ptr
+  br label %inner_inner_loop_begin
+; CHECK:       inner_inner_loop_ph:
+; CHECK-NEXT:    %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT:    br i1 %[[COND]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split
+
+inner_inner_loop_begin:
+  call void @sink1(i32 %b)
+  %a = load i32, i32* %a.ptr
+  br i1 %cond, label %inner_loop_exit, label %inner_inner_loop_a
+
+inner_inner_loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin
+; The cloned path doesn't actually loop and is an exit from the outer loop as
+; well.
+;
+; CHECK:       inner_inner_loop_ph.split.us:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ]
+; CHECK-NEXT:    br label %inner_inner_loop_begin.us
+;
+; CHECK:       inner_inner_loop_begin.us:
+; CHECK-NEXT:    call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit.split.us
+;
+; CHECK:       inner_loop_exit.loopexit.split.us:
+; CHECK-NEXT:    %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin.us ]
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit
+;
+; The original remains a loop losing the exit edge.
+;
+; CHECK:       inner_inner_loop_ph.split:
+; CHECK-NEXT:    br label %inner_inner_loop_begin
+;
+; CHECK:       inner_inner_loop_begin:
+; CHECK-NEXT:    call void @sink1(i32 %[[B]])
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_inner_loop_a
+;
+; CHECK:       inner_inner_loop_a:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_exit, label %inner_inner_loop_begin
+
+inner_inner_loop_exit:
+  %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ]
+  %v3 = load i1, i1* %ptr
+  br i1 %v3, label %inner_loop_latch, label %inner_loop_exit
+; CHECK:       inner_inner_loop_exit:
+; CHECK-NEXT:    %[[A_INNER_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a ]
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1
+
+inner_loop_latch:
+  br label %inner_loop_begin
+; CHECK:       inner_loop_latch:
+; CHECK-NEXT:    br label %inner_loop_begin
+
+inner_loop_exit:
+  %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ]
+  %v4 = load i1, i1* %ptr
+  br i1 %v4, label %loop_begin, label %loop_exit
+; CHECK:       inner_loop_exit.loopexit:
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit.loopexit1:
+; CHECK-NEXT:    %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA]], %inner_inner_loop_exit ]
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit:
+; CHECK-NEXT:    %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit1 ]
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ]
+  ret i32 %a.lcssa
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ]
+; CHECK-NEXT:    ret i32 %[[A_LCSSA]]
+}
+
+; Like test11b, but checking that when the whole thing is wrapped in yet
+; another loop, we correctly sink the preheader to the outermost loop rather
+; than only handling the case where the preheader is completely removed from
+; a loop.
+define i32 @test12b(i1* %ptr, i1* %cond.ptr, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test12b(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  br label %inner_loop_begin
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %inner_loop_begin
+
+inner_loop_begin:
+  %b = load i32, i32* %b.ptr
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %inner_loop_latch, label %inner_inner_loop_ph
+; CHECK:       inner_loop_begin:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_latch, label %inner_inner_loop_ph
+
+inner_inner_loop_ph:
+  %cond = load i1, i1* %cond.ptr
+  br label %inner_inner_loop_begin
+; CHECK:       inner_inner_loop_ph:
+; CHECK-NEXT:    %[[COND:.*]] = load i1, i1* %cond.ptr
+; CHECK-NEXT:    br i1 %[[COND]], label %inner_inner_loop_ph.split.us, label %inner_inner_loop_ph.split
+
+inner_inner_loop_begin:
+  call void @sink1(i32 %b)
+  %a = load i32, i32* %a.ptr
+  br i1 %cond, label %inner_inner_loop_a, label %inner_loop_exit
+
+inner_inner_loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %inner_inner_loop_exit, label %inner_inner_loop_begin
+; The cloned path continues to loop without the exit out of the entire nest.
+;
+; CHECK:       inner_inner_loop_ph.split.us:
+; CHECK-NEXT:    br label %inner_inner_loop_begin.us
+;
+; CHECK:       inner_inner_loop_begin.us:
+; CHECK-NEXT:    call void @sink1(i32 %[[B]])
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_inner_loop_a.us
+;
+; CHECK:       inner_inner_loop_a.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_inner_loop_exit.split.us, label %inner_inner_loop_begin.us
+;
+; CHECK:       inner_inner_loop_exit.split.us:
+; CHECK-NEXT:    %[[A_INNER_INNER_LCSSA_US:.*]] = phi i32 [ %[[A]], %inner_inner_loop_a.us ]
+; CHECK-NEXT:    br label %inner_inner_loop_exit
+;
+; The original remains a loop losing the exit edge.
+;
+; CHECK:       inner_inner_loop_ph.split:
+; CHECK-NEXT:    %[[B_LCSSA:.*]] = phi i32 [ %[[B]], %inner_inner_loop_ph ]
+; CHECK-NEXT:    br label %inner_inner_loop_begin
+;
+; CHECK:       inner_inner_loop_begin:
+; CHECK-NEXT:    call void @sink1(i32 %[[B_LCSSA]])
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    br label %inner_loop_exit.loopexit
+
+inner_inner_loop_exit:
+  %a.inner_inner_lcssa = phi i32 [ %a, %inner_inner_loop_a ]
+  %v3 = load i1, i1* %ptr
+  br i1 %v3, label %inner_loop_latch, label %inner_loop_exit
+; CHECK:       inner_inner_loop_exit:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %inner_loop_latch, label %inner_loop_exit.loopexit1
+
+inner_loop_latch:
+  br label %inner_loop_begin
+; CHECK:       inner_loop_latch:
+; CHECK-NEXT:    br label %inner_loop_begin
+
+inner_loop_exit:
+  %a.inner_lcssa = phi i32 [ %a, %inner_inner_loop_begin ], [ %a.inner_inner_lcssa, %inner_inner_loop_exit ]
+  %v4 = load i1, i1* %ptr
+  br i1 %v4, label %loop_begin, label %loop_exit
+; CHECK:       inner_loop_exit.loopexit:
+; CHECK-NEXT:    %[[A_INNER_LCSSA:.*]] = phi i32 [ %[[A]], %inner_inner_loop_begin ]
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit.loopexit1:
+; CHECK-NEXT:    %[[A_INNER_LCSSA_US:.*]] = phi i32 [ %[[A_INNER_INNER_LCSSA_US]], %inner_inner_loop_exit ]
+; CHECK-NEXT:    br label %inner_loop_exit
+;
+; CHECK:       inner_loop_exit:
+; CHECK-NEXT:    %[[A_INNER_PHI:.*]] = phi i32 [ %[[A_INNER_LCSSA]], %inner_loop_exit.loopexit ], [ %[[A_INNER_LCSSA_US]], %inner_loop_exit.loopexit1 ]
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit
+
+loop_exit:
+  %a.lcssa = phi i32 [ %a.inner_lcssa, %inner_loop_exit ]
+  ret i32 %a.lcssa
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A_INNER_PHI]], %inner_loop_exit ]
+; CHECK-NEXT:    ret i32 %[[A_LCSSA]]
+}
+
+; Test where the cloned loop has an inner loop that has to be traversed to form
+; the cloned loop, and where this inner loop has multiple blocks, and where the
+; exiting block that connects the inner loop to the cloned loop is not the header
+; block. This ensures that we correctly handle interesting corner cases of
+; traversing back to the header when establishing the cloned loop.
+define i32 @test13a(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test13a(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %a = load i32, i32* %a.ptr
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %loop_a, label %loop_b
+
+loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %loop_exit, label %loop_latch
+
+loop_b:
+  %b = load i32, i32* %b.ptr
+  br i1 %cond, label %loop_b_inner_ph, label %loop_exit
+
+loop_b_inner_ph:
+  br label %loop_b_inner_header
+
+loop_b_inner_header:
+  %v3 = load i1, i1* %ptr
+  br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body
+
+loop_b_inner_body:
+  %v4 = load i1, i1* %ptr
+  br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit
+
+loop_b_inner_latch:
+  br label %loop_b_inner_header
+
+loop_b_inner_exit:
+  br label %loop_latch
+
+loop_latch:
+  br label %loop_begin
+; The cloned loop contains an inner loop within it.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_a.us, label %loop_b.us
+;
+; CHECK:       loop_b.us:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br label %loop_b_inner_ph.us
+;
+; CHECK:       loop_b_inner_ph.us:
+; CHECK-NEXT:    br label %loop_b_inner_header.us
+;
+; CHECK:       loop_b_inner_header.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_body.us
+;
+; CHECK:       loop_b_inner_body.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_b_inner_latch.us, label %loop_b_inner_exit.us
+;
+; CHECK:       loop_b_inner_exit.us:
+; CHECK-NEXT:    br label %loop_latch.us
+;
+; CHECK:       loop_b_inner_latch.us:
+; CHECK-NEXT:    br label %loop_b_inner_header.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit.split.us, label %loop_latch.us
+;
+; CHECK:       loop_latch.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; And the original loop no longer contains an inner loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_a, label %loop_b
+;
+; CHECK:       loop_a:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit.split.loopexit, label %loop_latch
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_latch:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ]
+  ret i32 %lcssa
+; CHECK:       loop_exit.split.loopexit:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ]
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[AB_PHI:.*]] = phi i32 [ %[[B]], %loop_b ], [ %[[A_LCSSA]], %loop_exit.split.loopexit ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[AB_PHI_US:.*]] = phi i32 [ %[[AB_PHI]], %loop_exit.split ], [ %[[A_LCSSA_US]], %loop_exit.split.us ]
+; CHECK-NEXT:    ret i32 %[[AB_PHI_US]]
+}
+
+; Test where the original loop has an inner loop that has to be traversed to
+; rebuild the loop, and where this inner loop has multiple blocks, and where
+; the exiting block that connects the inner loop to the original loop is not
+; the header block. This ensures that we correctly handle interesting corner
+; cases of traversing back to the header when re-establishing the original loop
+; still exists after unswitching.
+define i32 @test13b(i1* %ptr, i1 %cond, i32* %a.ptr, i32* %b.ptr) {
+; CHECK-LABEL: @test13b(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %a = load i32, i32* %a.ptr
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %loop_a, label %loop_b
+
+loop_a:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %loop_exit, label %loop_latch
+
+loop_b:
+  %b = load i32, i32* %b.ptr
+  br i1 %cond, label %loop_exit, label %loop_b_inner_ph
+
+loop_b_inner_ph:
+  br label %loop_b_inner_header
+
+loop_b_inner_header:
+  %v3 = load i1, i1* %ptr
+  br i1 %v3, label %loop_b_inner_latch, label %loop_b_inner_body
+
+loop_b_inner_body:
+  %v4 = load i1, i1* %ptr
+  br i1 %v4, label %loop_b_inner_latch, label %loop_b_inner_exit
+
+loop_b_inner_latch:
+  br label %loop_b_inner_header
+
+loop_b_inner_exit:
+  br label %loop_latch
+
+loop_latch:
+  br label %loop_begin
+; The cloned loop doesn't contain an inner loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_a.us, label %loop_b.us
+;
+; CHECK:       loop_b.us:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br label %loop_exit.split.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit.split.us.loopexit, label %loop_latch.us
+;
+; CHECK:       loop_latch.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_exit.split.us.loopexit:
+; CHECK-NEXT:    %[[A_LCSSA_US:.*]] = phi i32 [ %[[A]], %loop_a.us ]
+; CHECK-NEXT:    br label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    %[[AB_PHI_US:.*]] = phi i32 [ %[[B]], %loop_b.us ], [ %[[A_LCSSA_US]], %loop_exit.split.us.loopexit ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; But the original loop contains an inner loop that must be traversed.;
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[A:.*]] = load i32, i32* %a.ptr
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_a, label %loop_b
+;
+; CHECK:       loop_a:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_exit.split, label %loop_latch
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    %[[B:.*]] = load i32, i32* %b.ptr
+; CHECK-NEXT:    br label %loop_b_inner_ph
+;
+; CHECK:       loop_b_inner_ph:
+; CHECK-NEXT:    br label %loop_b_inner_header
+;
+; CHECK:       loop_b_inner_header:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_body
+;
+; CHECK:       loop_b_inner_body:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_b_inner_latch, label %loop_b_inner_exit
+;
+; CHECK:       loop_b_inner_latch:
+; CHECK-NEXT:    br label %loop_b_inner_header
+;
+; CHECK:       loop_b_inner_exit:
+; CHECK-NEXT:    br label %loop_latch
+;
+; CHECK:       loop_latch:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  %lcssa = phi i32 [ %a, %loop_a ], [ %b, %loop_b ]
+  ret i32 %lcssa
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[A_LCSSA:.*]] = phi i32 [ %[[A]], %loop_a ]
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[AB_PHI:.*]] = phi i32 [ %[[A_LCSSA]], %loop_exit.split ], [ %[[AB_PHI_US]], %loop_exit.split.us ]
+; CHECK-NEXT:    ret i32 %[[AB_PHI]]
+}
+
+define i32 @test20(i32* %var, i32 %cond1, i32 %cond2) {
+; CHECK-LABEL: @test20(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %cond2, label %[[ENTRY_SPLIT_EXIT:.*]] [
+; CHECK-NEXT:      i32 0, label %[[ENTRY_SPLIT_A:.*]]
+; CHECK-NEXT:      i32 1, label %[[ENTRY_SPLIT_A]]
+; CHECK-NEXT:      i32 13, label %[[ENTRY_SPLIT_B:.*]]
+; CHECK-NEXT:      i32 2, label %[[ENTRY_SPLIT_A]]
+; CHECK-NEXT:      i32 42, label %[[ENTRY_SPLIT_C:.*]]
+; CHECK-NEXT:    ]
+
+loop_begin:
+  %var_val = load i32, i32* %var
+  switch i32 %cond2, label %loop_exit [
+    i32 0, label %loop_a
+    i32 1, label %loop_a
+    i32 13, label %loop_b
+    i32 2, label %loop_a
+    i32 42, label %loop_c
+  ]
+
+loop_a:
+  call i32 @a()
+  br label %loop_latch
+; Unswitched 'a' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_A]]:
+; CHECK-NEXT:    br label %[[LOOP_BEGIN_A:.*]]
+;
+; CHECK:       [[LOOP_BEGIN_A]]:
+; CHECK-NEXT:    %{{.*}} = load i32, i32* %var
+; CHECK-NEXT:    br label %[[LOOP_A:.*]]
+;
+; CHECK:       [[LOOP_A]]:
+; CHECK-NEXT:    call i32 @a()
+; CHECK-NEXT:    br label %[[LOOP_LATCH_A:.*]]
+;
+; CHECK:       [[LOOP_LATCH_A]]:
+; CHECK:         br label %[[LOOP_BEGIN_A]]
+
+loop_b:
+  call i32 @b()
+  br label %loop_latch
+; Unswitched 'b' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_B]]:
+; CHECK-NEXT:    br label %[[LOOP_BEGIN_B:.*]]
+;
+; CHECK:       [[LOOP_BEGIN_B]]:
+; CHECK-NEXT:    %{{.*}} = load i32, i32* %var
+; CHECK-NEXT:    br label %[[LOOP_B:.*]]
+;
+; CHECK:       [[LOOP_B]]:
+; CHECK-NEXT:    call i32 @b()
+; CHECK-NEXT:    br label %[[LOOP_LATCH_B:.*]]
+;
+; CHECK:       [[LOOP_LATCH_B]]:
+; CHECK:         br label %[[LOOP_BEGIN_B]]
+
+loop_c:
+  call i32 @c() noreturn nounwind
+  br label %loop_latch
+; Unswitched 'c' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_C]]:
+; CHECK-NEXT:    br label %[[LOOP_BEGIN_C:.*]]
+;
+; CHECK:       [[LOOP_BEGIN_C]]:
+; CHECK-NEXT:    %{{.*}} = load i32, i32* %var
+; CHECK-NEXT:    br label %[[LOOP_C:.*]]
+;
+; CHECK:       [[LOOP_C]]:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %[[LOOP_LATCH_C:.*]]
+;
+; CHECK:       [[LOOP_LATCH_C]]:
+; CHECK:         br label %[[LOOP_BEGIN_C]]
+
+loop_latch:
+  br label %loop_begin
+
+loop_exit:
+  %lcssa = phi i32 [ %var_val, %loop_begin ]
+  ret i32 %lcssa
+; Unswitched exit edge (no longer a loop).
+;
+; CHECK:       [[ENTRY_SPLIT_EXIT]]:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[V:.*]] = load i32, i32* %var
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[LCSSA:.*]] = phi i32 [ %[[V]], %loop_begin ]
+; CHECK-NEXT:    ret i32 %[[LCSSA]]
+}
+
+; Negative test: we do not switch when the loop contains unstructured control
+; flows as it would significantly complicate the process as novel loops might
+; be formed, etc.
+define void @test_no_unswitch_unstructured_cfg(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test_no_unswitch_unstructured_cfg(
+entry:
+  br label %loop_begin
+
+loop_begin:
+  br i1 %cond, label %loop_left, label %loop_right
+
+loop_left:
+  %v1 = load i1, i1* %ptr
+  br i1 %v1, label %loop_right, label %loop_merge
+
+loop_right:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %loop_left, label %loop_merge
+
+loop_merge:
+  %v3 = load i1, i1* %ptr
+  br i1 %v3, label %loop_latch, label %loop_exit
+
+loop_latch:
+  br label %loop_begin
+
+loop_exit:
+  ret void
+}
+
+; A test reduced out of 403.gcc with interesting nested loops that trigger
+; multiple unswitches. A key component of this test is that there are multiple
+; paths to reach an inner loop after unswitching, and one of them is via the
+; predecessors of the unswitched loop header. That can allow us to find the loop
+; through multiple different paths.
+define void @test21(i1 %a, i1 %b) {
+; CHECK-LABEL: @test21(
+bb:
+  br label %bb3
+; CHECK-NOT:     br i1 %a
+;
+; CHECK:         br i1 %a, label %[[BB_SPLIT_US:.*]], label %[[BB_SPLIT:.*]]
+;
+; CHECK-NOT:     br i1 %a
+; CHECK-NOT:     br i1 %b
+;
+; CHECK:       [[BB_SPLIT]]:
+; CHECK:         br i1 %b
+;
+; CHECK-NOT:     br i1 %a
+; CHECK-NOT:     br i1 %b
+
+bb3:
+  %tmp1.0 = phi i32 [ 0, %bb ], [ %tmp1.3, %bb23 ]
+  br label %bb7
+
+bb7:
+  %tmp.0 = phi i1 [ true, %bb3 ], [ false, %bb19 ]
+  %tmp1.1 = phi i32 [ %tmp1.0, %bb3 ], [ %tmp1.2.lcssa, %bb19 ]
+  br i1 %tmp.0, label %bb11.preheader, label %bb23
+
+bb11.preheader:
+  br i1 %a, label %bb19, label %bb14.lr.ph
+
+bb14.lr.ph:
+  br label %bb14
+
+bb14:
+  %tmp2.02 = phi i32 [ 0, %bb14.lr.ph ], [ 1, %bb14 ]
+  br i1 %b, label %bb11.bb19_crit_edge, label %bb14
+
+bb11.bb19_crit_edge:
+  %split = phi i32 [ %tmp2.02, %bb14 ]
+  br label %bb19
+
+bb19:
+  %tmp1.2.lcssa = phi i32 [ %split, %bb11.bb19_crit_edge ], [ %tmp1.1, %bb11.preheader ]
+  %tmp21 = icmp eq i32 %tmp1.2.lcssa, 0
+  br i1 %tmp21, label %bb23, label %bb7
+
+bb23:
+  %tmp1.3 = phi i32 [ %tmp1.2.lcssa, %bb19 ], [ %tmp1.1, %bb7 ]
+  br label %bb3
+}
+
+; A test reduced out of 400.perlbench that when unswitching the `%stop`
+; condition clones a loop nest outside of a containing loop. This excercises a
+; different cloning path from our other test cases and in turn verifying the
+; resulting structure can catch any failures to correctly clone these nested
+; loops.
+declare void @f()
+declare void @g()
+declare i32 @h(i32 %arg)
+define void @test22(i32 %arg) {
+; CHECK-LABEL: define void @test22(
+entry:
+  br label %loop1.header
+
+loop1.header:
+  %stop = phi i1 [ true, %loop1.latch ], [ false, %entry ]
+  %i = phi i32 [ %i.lcssa, %loop1.latch ], [ %arg, %entry ]
+; CHECK:         %[[I:.*]] = phi i32 [ %{{.*}}, %loop1.latch ], [ %arg, %entry ]
+  br i1 %stop, label %loop1.exit, label %loop1.body.loop2.ph
+; CHECK:         br i1 %stop, label %loop1.exit, label %loop1.body.loop2.ph
+
+loop1.body.loop2.ph:
+  br label %loop2.header
+; Just check that the we unswitched the key condition and that leads to the
+; inner loop header.
+;
+; CHECK:       loop1.body.loop2.ph:
+; CHECK-NEXT:    br i1 %stop, label %[[SPLIT_US:.*]], label %[[SPLIT:.*]]
+;
+; CHECK:       [[SPLIT_US]]:
+; CHECK-NEXT:    br label %[[LOOP2_HEADER_US:.*]]
+;
+; CHECK:       [[LOOP2_HEADER_US]]:
+; CHECK-NEXT:    %{{.*}} = phi i32 [ %[[I]], %[[SPLIT_US]] ]
+;
+; CHECK:       [[SPLIT]]:
+; CHECK-NEXT:    br label %[[LOOP2_HEADER:.*]]
+;
+; CHECK:       [[LOOP2_HEADER]]:
+; CHECK-NEXT:    %{{.*}} = phi i32 [ %[[I]], %[[SPLIT]] ]
+
+loop2.header:
+  %i.inner = phi i32 [ %i, %loop1.body.loop2.ph ], [ %i.next, %loop2.latch ]
+  br label %loop3.header
+
+loop3.header:
+  %sw = call i32 @h(i32 %i.inner)
+  switch i32 %sw, label %loop3.exit [
+    i32 32, label %loop3.header
+    i32 59, label %loop2.latch
+    i32 36, label %loop1.latch
+  ]
+
+loop2.latch:
+  %i.next = add i32 %i.inner, 1
+  br i1 %stop, label %loop2.exit, label %loop2.header
+
+loop1.latch:
+  %i.lcssa = phi i32 [ %i.inner, %loop3.header ]
+  br label %loop1.header
+
+loop3.exit:
+  call void @f()
+  ret void
+
+loop2.exit:
+  call void @g()
+  ret void
+
+loop1.exit:
+  call void @g()
+  ret void
+}
+
+; Test that when we are unswitching and need to rebuild the loop block set we
+; correctly skip past inner loops. We want to use the inner loop to efficiently
+; skip whole subregions of the outer loop blocks but just because the header of
+; the outer loop is also the preheader of an inner loop shouldn't confuse this
+; walk.
+define void @test23(i1 %arg, i1* %ptr) {
+; CHECK-LABEL: define void @test23(
+entry:
+  br label %outer.header
+; CHECK:       entry:
+; CHECK-NEXT:    br i1 %arg,
+;
+; Just verify that we unswitched the correct bits. We should call `@f` twice in
+; one unswitch and `@f` and then `@g` in the other.
+; CHECK:         call void
+; CHECK-SAME:              @f
+; CHECK:         call void
+; CHECK-SAME:              @f
+;
+; CHECK:         call void
+; CHECK-SAME:              @f
+; CHECK:         call void
+; CHECK-SAME:              @g
+
+outer.header:
+  br label %inner.header
+
+inner.header:
+  call void @f()
+  br label %inner.latch
+
+inner.latch:
+  %inner.cond = load i1, i1* %ptr
+  br i1 %inner.cond, label %inner.header, label %outer.body
+
+outer.body:
+  br i1 %arg, label %outer.body.left, label %outer.body.right
+
+outer.body.left:
+  call void @f()
+  br label %outer.latch
+
+outer.body.right:
+  call void @g()
+  br label %outer.latch
+
+outer.latch:
+  %outer.cond = load i1, i1* %ptr
+  br i1 %outer.cond, label %outer.header, label %exit
+
+exit:
+  ret void
+}
+
+; Non-trivial loop unswitching where there are two invariant conditions, but the
+; second one is only in the cloned copy of the loop after unswitching.
+define i32 @test24(i1* %ptr, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test24(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %entry.split.us, label %entry.split
+
+loop_begin:
+  br i1 %cond1, label %loop_a, label %loop_b
+
+loop_a:
+  br i1 %cond2, label %loop_a_a, label %loop_a_c
+; The second unswitched condition.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br i1 %cond2, label %entry.split.us.split.us, label %entry.split.us.split
+
+loop_a_a:
+  call i32 @a()
+  br label %latch
+; The 'loop_a_a' unswitched loop.
+;
+; CHECK:       entry.split.us.split.us:
+; CHECK-NEXT:    br label %loop_begin.us.us
+;
+; CHECK:       loop_begin.us.us:
+; CHECK-NEXT:    br label %loop_a.us.us
+;
+; CHECK:       loop_a.us.us:
+; CHECK-NEXT:    br label %loop_a_a.us.us
+;
+; CHECK:       loop_a_a.us.us:
+; CHECK-NEXT:    call i32 @a()
+; CHECK-NEXT:    br label %latch.us.us
+;
+; CHECK:       latch.us.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us.us, label %loop_exit.split.us.split.us
+;
+; CHECK:       loop_exit.split.us.split.us:
+; CHECK-NEXT:    br label %loop_exit.split
+
+loop_a_c:
+  call i32 @c()
+  br label %latch
+; The 'loop_a_c' unswitched loop.
+;
+; CHECK:       entry.split.us.split:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    br label %loop_a.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    br label %loop_a_c.us
+;
+; CHECK:       loop_a_c.us:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %latch
+;
+; CHECK:       latch.us:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us.split
+;
+; CHECK:       loop_exit.split.us.split:
+; CHECK-NEXT:    br label %loop_exit.split
+
+loop_b:
+  call i32 @b()
+  br label %latch
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %loop_b
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    call i32 @b()
+; CHECK-NEXT:    br label %latch
+;
+; CHECK:       latch:
+; CHECK-NEXT:    %[[V:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    br label %loop_exit
+
+latch:
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret
+}
+
+; Non-trivial partial loop unswitching of an invariant input to an 'or'.
+define i32 @test25(i1* %ptr, i1 %cond) {
+; CHECK-LABEL: @test25(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split.us, label %entry.split
+
+loop_begin:
+  %v1 = load i1, i1* %ptr
+  %cond_or = or i1 %v1, %cond
+  br i1 %cond_or, label %loop_a, label %loop_b
+
+loop_a:
+  call i32 @a()
+  br label %latch
+; The 'loop_a' unswitched loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[V1_US:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[OR_US:.*]] = or i1 %[[V1_US]], true
+; CHECK-NEXT:    br label %loop_a.us
+;
+; CHECK:       loop_a.us:
+; CHECK-NEXT:    call i32 @a()
+; CHECK-NEXT:    br label %latch.us
+;
+; CHECK:       latch.us:
+; CHECK-NEXT:    %[[V2_US:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V2_US]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b:
+  call i32 @b()
+  br label %latch
+; The original loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[V1:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    %[[OR:.*]] = or i1 %[[V1]], false
+; CHECK-NEXT:    br i1 %[[OR]], label %loop_a, label %loop_b
+;
+; CHECK:       loop_a:
+; CHECK-NEXT:    call i32 @a()
+; CHECK-NEXT:    br label %latch
+;
+; CHECK:       loop_b:
+; CHECK-NEXT:    call i32 @b()
+; CHECK-NEXT:    br label %latch
+
+latch:
+  %v2 = load i1, i1* %ptr
+  br i1 %v2, label %loop_begin, label %loop_exit
+; CHECK:       latch:
+; CHECK-NEXT:    %[[V2:.*]] = load i1, i1* %ptr
+; CHECK-NEXT:    br i1 %[[V2]], label %loop_begin, label %loop_exit.split
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret
+}
+
+; Non-trivial partial loop unswitching of multiple invariant inputs to an `and`
+; chain.
+define i32 @test26(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2, i1 %cond3) {
+; CHECK-LABEL: @test26(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[INV_AND:.*]] = and i1 %cond3, %cond1
+; CHECK-NEXT:    br i1 %[[INV_AND]], label %entry.split, label %entry.split.us
+
+loop_begin:
+  %v1 = load i1, i1* %ptr1
+  %v2 = load i1, i1* %ptr2
+  %cond_and1 = and i1 %v1, %cond1
+  %cond_or1 = or i1 %v2, %cond2
+  %cond_and2 = and i1 %cond_and1, %cond_or1
+  %cond_and3 = and i1 %cond_and2, %cond3
+  br i1 %cond_and3, label %loop_a, label %loop_b
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[V1_US:.*]] = load i1, i1* %ptr1
+; CHECK-NEXT:    %[[V2_US:.*]] = load i1, i1* %ptr2
+; CHECK-NEXT:    %[[AND1_US:.*]] = and i1 %[[V1_US]], %cond1
+; CHECK-NEXT:    %[[OR1_US:.*]] = or i1 %[[V2_US]], %cond2
+; CHECK-NEXT:    %[[AND2_US:.*]] = and i1 %[[AND1_US]], %[[OR1_US]]
+; CHECK-NEXT:    %[[AND3_US:.*]] = and i1 %[[AND2_US]], %cond3
+; CHECK-NEXT:    br label %loop_b.us
+;
+; CHECK:       loop_b.us:
+; CHECK-NEXT:    call i32 @b()
+; CHECK-NEXT:    br label %latch.us
+;
+; CHECK:       latch.us:
+; CHECK-NEXT:    %[[V3_US:.*]] = load i1, i1* %ptr3
+; CHECK-NEXT:    br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    br label %loop_exit
+
+; The original loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[V1:.*]] = load i1, i1* %ptr1
+; CHECK-NEXT:    %[[V2:.*]] = load i1, i1* %ptr2
+; CHECK-NEXT:    %[[AND1:.*]] = and i1 %[[V1]], true
+; CHECK-NEXT:    %[[OR1:.*]] = or i1 %[[V2]], %cond2
+; CHECK-NEXT:    %[[AND2:.*]] = and i1 %[[AND1]], %[[OR1]]
+; CHECK-NEXT:    %[[AND3:.*]] = and i1 %[[AND2]], true
+; CHECK-NEXT:    br i1 %[[AND3]], label %loop_a, label %loop_b
+
+loop_a:
+  call i32 @a()
+  br label %latch
+; CHECK:       loop_a:
+; CHECK-NEXT:    call i32 @a()
+; CHECK-NEXT:    br label %latch
+
+loop_b:
+  call i32 @b()
+  br label %latch
+; CHECK:       loop_b:
+; CHECK-NEXT:    call i32 @b()
+; CHECK-NEXT:    br label %latch
+
+latch:
+  %v3 = load i1, i1* %ptr3
+  br i1 %v3, label %loop_begin, label %loop_exit
+; CHECK:       latch:
+; CHECK-NEXT:    %[[V3:.*]] = load i1, i1* %ptr3
+; CHECK-NEXT:    br i1 %[[V3]], label %loop_begin, label %loop_exit.split
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret
+}
+
+; Non-trivial partial loop unswitching of multiple invariant inputs to an `or`
+; chain. Basically an inverted version of corresponding `and` test (test26).
+define i32 @test27(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2, i1 %cond3) {
+; CHECK-LABEL: @test27(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[INV_OR:.*]] = or i1 %cond3, %cond1
+; CHECK-NEXT:    br i1 %[[INV_OR]], label %entry.split.us, label %entry.split
+
+loop_begin:
+  %v1 = load i1, i1* %ptr1
+  %v2 = load i1, i1* %ptr2
+  %cond_or1 = or i1 %v1, %cond1
+  %cond_and1 = and i1 %v2, %cond2
+  %cond_or2 = or i1 %cond_or1, %cond_and1
+  %cond_or3 = or i1 %cond_or2, %cond3
+  br i1 %cond_or3, label %loop_b, label %loop_a
+; The 'loop_b' unswitched loop.
+;
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label %loop_begin.us
+;
+; CHECK:       loop_begin.us:
+; CHECK-NEXT:    %[[V1_US:.*]] = load i1, i1* %ptr1
+; CHECK-NEXT:    %[[V2_US:.*]] = load i1, i1* %ptr2
+; CHECK-NEXT:    %[[OR1_US:.*]] = or i1 %[[V1_US]], %cond1
+; CHECK-NEXT:    %[[AND1_US:.*]] = and i1 %[[V2_US]], %cond2
+; CHECK-NEXT:    %[[OR2_US:.*]] = or i1 %[[OR1_US]], %[[AND1_US]]
+; CHECK-NEXT:    %[[OR3_US:.*]] = or i1 %[[OR2_US]], %cond3
+; CHECK-NEXT:    br label %loop_b.us
+;
+; CHECK:       loop_b.us:
+; CHECK-NEXT:    call i32 @b()
+; CHECK-NEXT:    br label %latch.us
+;
+; CHECK:       latch.us:
+; CHECK-NEXT:    %[[V3_US:.*]] = load i1, i1* %ptr3
+; CHECK-NEXT:    br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us
+;
+; CHECK:       loop_exit.split.us:
+; CHECK-NEXT:    br label %loop_exit
+
+; The original loop.
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+;
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[V1:.*]] = load i1, i1* %ptr1
+; CHECK-NEXT:    %[[V2:.*]] = load i1, i1* %ptr2
+; CHECK-NEXT:    %[[OR1:.*]] = or i1 %[[V1]], false
+; CHECK-NEXT:    %[[AND1:.*]] = and i1 %[[V2]], %cond2
+; CHECK-NEXT:    %[[OR2:.*]] = or i1 %[[OR1]], %[[AND1]]
+; CHECK-NEXT:    %[[OR3:.*]] = or i1 %[[OR2]], false
+; CHECK-NEXT:    br i1 %[[OR3]], label %loop_b, label %loop_a
+
+loop_a:
+  call i32 @a()
+  br label %latch
+; CHECK:       loop_a:
+; CHECK-NEXT:    call i32 @a()
+; CHECK-NEXT:    br label %latch
+
+loop_b:
+  call i32 @b()
+  br label %latch
+; CHECK:       loop_b:
+; CHECK-NEXT:    call i32 @b()
+; CHECK-NEXT:    br label %latch
+
+latch:
+  %v3 = load i1, i1* %ptr3
+  br i1 %v3, label %loop_begin, label %loop_exit
+; CHECK:       latch:
+; CHECK-NEXT:    %[[V3:.*]] = load i1, i1* %ptr3
+; CHECK-NEXT:    br i1 %[[V3]], label %loop_begin, label %loop_exit.split
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    br label %loop_exit
+;
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret
+}
+
+; Non-trivial unswitching of a switch.
+define i32 @test28(i1* %ptr, i32 %cond) {
+; CHECK-LABEL: @test28(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %cond, label %[[ENTRY_SPLIT_LATCH:.*]] [
+; CHECK-NEXT:      i32 0, label %[[ENTRY_SPLIT_A:.*]]
+; CHECK-NEXT:      i32 1, label %[[ENTRY_SPLIT_B:.*]]
+; CHECK-NEXT:      i32 2, label %[[ENTRY_SPLIT_C:.*]]
+; CHECK-NEXT:    ]
+
+loop_begin:
+  switch i32 %cond, label %latch [
+    i32 0, label %loop_a
+    i32 1, label %loop_b
+    i32 2, label %loop_c
+  ]
+
+loop_a:
+  call i32 @a()
+  br label %latch
+; Unswitched 'a' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_A]]:
+; CHECK-NEXT:    br label %[[LOOP_BEGIN_A:.*]]
+;
+; CHECK:       [[LOOP_BEGIN_A]]:
+; CHECK-NEXT:    br label %[[LOOP_A:.*]]
+;
+; CHECK:       [[LOOP_A]]:
+; CHECK-NEXT:    call i32 @a()
+; CHECK-NEXT:    br label %[[LOOP_LATCH_A:.*]]
+;
+; CHECK:       [[LOOP_LATCH_A]]:
+; CHECK-NEXT:    %[[V_A:.*]] = load i1, i1* %ptr
+; CHECK:         br i1 %[[V_A]], label %[[LOOP_BEGIN_A]], label %[[LOOP_EXIT_A:.*]]
+;
+; CHECK:       [[LOOP_EXIT_A]]:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_b:
+  call i32 @b()
+  br label %latch
+; Unswitched 'b' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_B]]:
+; CHECK-NEXT:    br label %[[LOOP_BEGIN_B:.*]]
+;
+; CHECK:       [[LOOP_BEGIN_B]]:
+; CHECK-NEXT:    br label %[[LOOP_B:.*]]
+;
+; CHECK:       [[LOOP_B]]:
+; CHECK-NEXT:    call i32 @b()
+; CHECK-NEXT:    br label %[[LOOP_LATCH_B:.*]]
+;
+; CHECK:       [[LOOP_LATCH_B]]:
+; CHECK-NEXT:    %[[V_B:.*]] = load i1, i1* %ptr
+; CHECK:         br i1 %[[V_B]], label %[[LOOP_BEGIN_B]], label %[[LOOP_EXIT_B:.*]]
+;
+; CHECK:       [[LOOP_EXIT_B]]:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_c:
+  call i32 @c()
+  br label %latch
+; Unswitched 'c' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_C]]:
+; CHECK-NEXT:    br label %[[LOOP_BEGIN_C:.*]]
+;
+; CHECK:       [[LOOP_BEGIN_C]]:
+; CHECK-NEXT:    br label %[[LOOP_C:.*]]
+;
+; CHECK:       [[LOOP_C]]:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %[[LOOP_LATCH_C:.*]]
+;
+; CHECK:       [[LOOP_LATCH_C]]:
+; CHECK-NEXT:    %[[V_C:.*]] = load i1, i1* %ptr
+; CHECK:         br i1 %[[V_C]], label %[[LOOP_BEGIN_C]], label %[[LOOP_EXIT_C:.*]]
+;
+; CHECK:       [[LOOP_EXIT_C]]:
+; CHECK-NEXT:    br label %loop_exit
+
+latch:
+  %v = load i1, i1* %ptr
+  br i1 %v, label %loop_begin, label %loop_exit
+; Unswitched the 'latch' only loop.
+;
+; CHECK:       [[ENTRY_SPLIT_LATCH]]:
+; CHECK-NEXT:    br label %[[LOOP_BEGIN_LATCH:.*]]
+;
+; CHECK:       [[LOOP_BEGIN_LATCH]]:
+; CHECK-NEXT:    br label %[[LOOP_LATCH_LATCH:.*]]
+;
+; CHECK:       [[LOOP_LATCH_LATCH]]:
+; CHECK-NEXT:    %[[V_LATCH:.*]] = load i1, i1* %ptr
+; CHECK:         br i1 %[[V_LATCH]], label %[[LOOP_BEGIN_LATCH]], label %[[LOOP_EXIT_LATCH:.*]]
+;
+; CHECK:       [[LOOP_EXIT_LATCH]]:
+; CHECK-NEXT:    br label %loop_exit
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret i32 0
+}
+
+; A test case designed to exercise unusual properties of switches: they
+; can introduce multiple edges to successors. These need lots of special case
+; handling as they get collapsed in many cases (domtree, the unswitch itself)
+; but not in all cases (the PHI node operands).
+define i32 @test29(i32 %arg) {
+; CHECK-LABEL: @test29(
+entry:
+  br label %header
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %arg, label %[[ENTRY_SPLIT_C:.*]] [
+; CHECK-NEXT:      i32 0, label %[[ENTRY_SPLIT_A:.*]]
+; CHECK-NEXT:      i32 1, label %[[ENTRY_SPLIT_A]]
+; CHECK-NEXT:      i32 2, label %[[ENTRY_SPLIT_B:.*]]
+; CHECK-NEXT:      i32 3, label %[[ENTRY_SPLIT_C]]
+; CHECK-NEXT:    ]
+
+header:
+  %tmp = call i32 @d()
+  %cmp1 = icmp eq i32 %tmp, 0
+  ; We set up a chain through all the successors of the switch that doesn't
+  ; involve the switch so that we can have interesting PHI nodes in them.
+  br i1 %cmp1, label %body.a, label %dispatch
+
+dispatch:
+  ; Switch with multiple successors. We arrange the last successor to be the
+  ; default to make the test case easier to read. This has a duplicate edge
+  ; both to the default destination (which is completely superfluous but
+  ; technically valid IR) and to a regular successor.
+  switch i32 %arg, label %body.c [
+    i32 0, label %body.a
+    i32 1, label %body.a
+    i32 2, label %body.b
+    i32 3, label %body.c
+  ]
+
+body.a:
+  %tmp.a.phi = phi i32 [ 0, %header ], [ %tmp, %dispatch ], [ %tmp, %dispatch ]
+  %tmp.a = call i32 @a()
+  %tmp.a.sum = add i32 %tmp.a.phi, %tmp.a
+  br label %body.b
+; Unswitched 'a' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_A]]:
+; CHECK-NEXT:    br label %[[HEADER_A:.*]]
+;
+; CHECK:       [[HEADER_A]]:
+; CHECK-NEXT:    %[[TMP_A:.*]] = call i32 @d()
+; CHECK-NEXT:    %[[CMP1_A:.*]] = icmp eq i32 %[[TMP_A]], 0
+; CHECK-NEXT:    br i1 %[[CMP1_A]], label %[[BODY_A_A:.*]], label %[[DISPATCH_A:.*]]
+;
+; CHECK:       [[DISPATCH_A]]:
+; CHECK-NEXT:    br label %[[BODY_A_A]]
+;
+; CHECK:       [[BODY_A_A]]:
+; CHECK-NEXT:    %[[TMP_A_PHI_A:.*]] = phi i32 [ 0, %[[HEADER_A]] ], [ %[[TMP_A]], %[[DISPATCH_A]] ]
+; CHECK-NEXT:    %[[TMP_A_A:.*]] = call i32 @a()
+; CHECK-NEXT:    %[[TMP_A_SUM_A:.*]] = add i32 %[[TMP_A_PHI_A]], %[[TMP_A_A]]
+; CHECK-NEXT:    br label %[[BODY_B_A:.*]]
+;
+; CHECK:       [[BODY_B_A]]:
+; CHECK-NEXT:    %[[TMP_B_PHI_A:.*]] = phi i32 [ %[[TMP_A_SUM_A]], %[[BODY_A_A]] ]
+; CHECK-NEXT:    %[[TMP_B_A:.*]] = call i32 @b()
+; CHECK-NEXT:    %[[TMP_B_SUM_A:.*]] = add i32 %[[TMP_B_PHI_A]], %[[TMP_B_A]]
+; CHECK-NEXT:    br label %[[BODY_C_A:.*]]
+;
+; CHECK:       [[BODY_C_A]]:
+; CHECK-NEXT:    %[[TMP_C_PHI_A:.*]] = phi i32 [ %[[TMP_B_SUM_A]], %[[BODY_B_A]] ]
+; CHECK-NEXT:    %[[TMP_C_A:.*]] = call i32 @c()
+; CHECK-NEXT:    %[[TMP_C_SUM_A:.*]] = add i32 %[[TMP_C_PHI_A]], %[[TMP_C_A]]
+; CHECK-NEXT:    br label %[[LATCH_A:.*]]
+;
+; CHECK:       [[LATCH_A]]:
+; CHECK-NEXT:    %[[CMP2_A:.*]] = icmp slt i32 %[[TMP_C_SUM_A]], 42
+; CHECK:         br i1 %[[CMP2_A]], label %[[HEADER_A]], label %[[LOOP_EXIT_A:.*]]
+;
+; CHECK:       [[LOOP_EXIT_A]]:
+; CHECK-NEXT:    %[[LCSSA_A:.*]] = phi i32 [ %[[TMP_C_SUM_A]], %[[LATCH_A]] ]
+; CHECK-NEXT:    br label %exit
+
+body.b:
+  %tmp.b.phi = phi i32 [ %tmp, %dispatch ], [ %tmp.a.sum, %body.a ]
+  %tmp.b = call i32 @b()
+  %tmp.b.sum = add i32 %tmp.b.phi, %tmp.b
+  br label %body.c
+; Unswitched 'b' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_B]]:
+; CHECK-NEXT:    br label %[[HEADER_B:.*]]
+;
+; CHECK:       [[HEADER_B]]:
+; CHECK-NEXT:    %[[TMP_B:.*]] = call i32 @d()
+; CHECK-NEXT:    %[[CMP1_B:.*]] = icmp eq i32 %[[TMP_B]], 0
+; CHECK-NEXT:    br i1 %[[CMP1_B]], label %[[BODY_A_B:.*]], label %[[DISPATCH_B:.*]]
+;
+; CHECK:       [[DISPATCH_B]]:
+; CHECK-NEXT:    br label %[[BODY_B_B:.*]]
+;
+; CHECK:       [[BODY_A_B]]:
+; CHECK-NEXT:    %[[TMP_A_PHI_B:.*]] = phi i32 [ 0, %[[HEADER_B]] ]
+; CHECK-NEXT:    %[[TMP_A_B:.*]] = call i32 @a()
+; CHECK-NEXT:    %[[TMP_A_SUM_B:.*]] = add i32 %[[TMP_A_PHI_B]], %[[TMP_A_B]]
+; CHECK-NEXT:    br label %[[BODY_B_B:.*]]
+;
+; CHECK:       [[BODY_B_B]]:
+; CHECK-NEXT:    %[[TMP_B_PHI_B:.*]] = phi i32 [ %[[TMP_B]], %[[DISPATCH_B]] ], [ %[[TMP_A_SUM_B]], %[[BODY_A_B]] ]
+; CHECK-NEXT:    %[[TMP_B_B:.*]] = call i32 @b()
+; CHECK-NEXT:    %[[TMP_B_SUM_B:.*]] = add i32 %[[TMP_B_PHI_B]], %[[TMP_B_B]]
+; CHECK-NEXT:    br label %[[BODY_C_B:.*]]
+;
+; CHECK:       [[BODY_C_B]]:
+; CHECK-NEXT:    %[[TMP_C_PHI_B:.*]] = phi i32 [ %[[TMP_B_SUM_B]], %[[BODY_B_B]] ]
+; CHECK-NEXT:    %[[TMP_C_B:.*]] = call i32 @c()
+; CHECK-NEXT:    %[[TMP_C_SUM_B:.*]] = add i32 %[[TMP_C_PHI_B]], %[[TMP_C_B]]
+; CHECK-NEXT:    br label %[[LATCH_B:.*]]
+;
+; CHECK:       [[LATCH_B]]:
+; CHECK-NEXT:    %[[CMP2_B:.*]] = icmp slt i32 %[[TMP_C_SUM_B]], 42
+; CHECK:         br i1 %[[CMP2_B]], label %[[HEADER_B]], label %[[LOOP_EXIT_B:.*]]
+;
+; CHECK:       [[LOOP_EXIT_B]]:
+; CHECK-NEXT:    %[[LCSSA_B:.*]] = phi i32 [ %[[TMP_C_SUM_B]], %[[LATCH_B]] ]
+; CHECK-NEXT:    br label %[[EXIT_SPLIT:.*]]
+
+body.c:
+  %tmp.c.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ], [ %tmp.b.sum, %body.b ]
+  %tmp.c = call i32 @c()
+  %tmp.c.sum = add i32 %tmp.c.phi, %tmp.c
+  br label %latch
+; Unswitched 'c' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_C]]:
+; CHECK-NEXT:    br label %[[HEADER_C:.*]]
+;
+; CHECK:       [[HEADER_C]]:
+; CHECK-NEXT:    %[[TMP_C:.*]] = call i32 @d()
+; CHECK-NEXT:    %[[CMP1_C:.*]] = icmp eq i32 %[[TMP_C]], 0
+; CHECK-NEXT:    br i1 %[[CMP1_C]], label %[[BODY_A_C:.*]], label %[[DISPATCH_C:.*]]
+;
+; CHECK:       [[DISPATCH_C]]:
+; CHECK-NEXT:    br label %[[BODY_C_C:.*]]
+;
+; CHECK:       [[BODY_A_C]]:
+; CHECK-NEXT:    %[[TMP_A_PHI_C:.*]] = phi i32 [ 0, %[[HEADER_C]] ]
+; CHECK-NEXT:    %[[TMP_A_C:.*]] = call i32 @a()
+; CHECK-NEXT:    %[[TMP_A_SUM_C:.*]] = add i32 %[[TMP_A_PHI_C]], %[[TMP_A_C]]
+; CHECK-NEXT:    br label %[[BODY_B_C:.*]]
+;
+; CHECK:       [[BODY_B_C]]:
+; CHECK-NEXT:    %[[TMP_B_PHI_C:.*]] = phi i32 [ %[[TMP_A_SUM_C]], %[[BODY_A_C]] ]
+; CHECK-NEXT:    %[[TMP_B_C:.*]] = call i32 @b()
+; CHECK-NEXT:    %[[TMP_B_SUM_C:.*]] = add i32 %[[TMP_B_PHI_C]], %[[TMP_B_C]]
+; CHECK-NEXT:    br label %[[BODY_C_C:.*]]
+;
+; CHECK:       [[BODY_C_C]]:
+; CHECK-NEXT:    %[[TMP_C_PHI_C:.*]] = phi i32 [ %[[TMP_C]], %[[DISPATCH_C]] ], [ %[[TMP_B_SUM_C]], %[[BODY_B_C]] ]
+; CHECK-NEXT:    %[[TMP_C_C:.*]] = call i32 @c()
+; CHECK-NEXT:    %[[TMP_C_SUM_C:.*]] = add i32 %[[TMP_C_PHI_C]], %[[TMP_C_C]]
+; CHECK-NEXT:    br label %[[LATCH_C:.*]]
+;
+; CHECK:       [[LATCH_C]]:
+; CHECK-NEXT:    %[[CMP2_C:.*]] = icmp slt i32 %[[TMP_C_SUM_C]], 42
+; CHECK:         br i1 %[[CMP2_C]], label %[[HEADER_C]], label %[[LOOP_EXIT_C:.*]]
+;
+; CHECK:       [[LOOP_EXIT_C]]:
+; CHECK-NEXT:    %[[LCSSA_C:.*]] = phi i32 [ %[[TMP_C_SUM_C]], %[[LATCH_C]] ]
+; CHECK-NEXT:    br label %[[EXIT_SPLIT]]
+
+latch:
+  %cmp2 = icmp slt i32 %tmp.c.sum, 42
+  br i1 %cmp2, label %header, label %exit
+
+exit:
+  %lcssa.phi = phi i32 [ %tmp.c.sum, %latch ]
+  ret i32 %lcssa.phi
+; CHECK:       [[EXIT_SPLIT]]:
+; CHECK-NEXT:    %[[EXIT_PHI1:.*]] = phi i32 [ %[[LCSSA_C]], %[[LOOP_EXIT_C]] ], [ %[[LCSSA_B]], %[[LOOP_EXIT_B]] ]
+; CHECK-NEXT:    br label %exit
+
+; CHECK:       exit:
+; CHECK-NEXT:    %[[EXIT_PHI2:.*]] = phi i32 [ %[[EXIT_PHI1]], %[[EXIT_SPLIT]] ], [ %[[LCSSA_A]], %[[LOOP_EXIT_A]] ]
+; CHECK-NEXT:    ret i32 %[[EXIT_PHI2]]
+}
+
+; Similar to @test29 but designed to have one of the duplicate edges be
+; a loop exit edge as those can in some cases be special. Among other things,
+; this includes an LCSSA phi with multiple entries despite being a dedicated
+; exit block.
+define i32 @test30(i32 %arg) {
+; CHECK-LABEL: define i32 @test30(
+entry:
+  br label %header
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %arg, label %[[ENTRY_SPLIT_EXIT:.*]] [
+; CHECK-NEXT:      i32 -1, label %[[ENTRY_SPLIT_EXIT]]
+; CHECK-NEXT:      i32 0, label %[[ENTRY_SPLIT_A:.*]]
+; CHECK-NEXT:      i32 1, label %[[ENTRY_SPLIT_B:.*]]
+; CHECK-NEXT:      i32 2, label %[[ENTRY_SPLIT_B]]
+; CHECK-NEXT:    ]
+
+header:
+  %tmp = call i32 @d()
+  %cmp1 = icmp eq i32 %tmp, 0
+  br i1 %cmp1, label %body.a, label %dispatch
+
+dispatch:
+  switch i32 %arg, label %loop.exit1 [
+    i32 -1, label %loop.exit1
+    i32 0, label %body.a
+    i32 1, label %body.b
+    i32 2, label %body.b
+  ]
+
+body.a:
+  %tmp.a.phi = phi i32 [ 0, %header ], [ %tmp, %dispatch ]
+  %tmp.a = call i32 @a()
+  %tmp.a.sum = add i32 %tmp.a.phi, %tmp.a
+  br label %body.b
+; Unswitched 'a' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_A]]:
+; CHECK-NEXT:    br label %[[HEADER_A:.*]]
+;
+; CHECK:       [[HEADER_A]]:
+; CHECK-NEXT:    %[[TMP_A:.*]] = call i32 @d()
+; CHECK-NEXT:    %[[CMP1_A:.*]] = icmp eq i32 %[[TMP_A]], 0
+; CHECK-NEXT:    br i1 %[[CMP1_A]], label %[[BODY_A_A:.*]], label %[[DISPATCH_A:.*]]
+;
+; CHECK:       [[DISPATCH_A]]:
+; CHECK-NEXT:    br label %[[BODY_A_A]]
+;
+; CHECK:       [[BODY_A_A]]:
+; CHECK-NEXT:    %[[TMP_A_PHI_A:.*]] = phi i32 [ 0, %[[HEADER_A]] ], [ %[[TMP_A]], %[[DISPATCH_A]] ]
+; CHECK-NEXT:    %[[TMP_A_A:.*]] = call i32 @a()
+; CHECK-NEXT:    %[[TMP_A_SUM_A:.*]] = add i32 %[[TMP_A_PHI_A]], %[[TMP_A_A]]
+; CHECK-NEXT:    br label %[[BODY_B_A:.*]]
+;
+; CHECK:       [[BODY_B_A]]:
+; CHECK-NEXT:    %[[TMP_B_PHI_A:.*]] = phi i32 [ %[[TMP_A_SUM_A]], %[[BODY_A_A]] ]
+; CHECK-NEXT:    %[[TMP_B_A:.*]] = call i32 @b()
+; CHECK-NEXT:    %[[TMP_B_SUM_A:.*]] = add i32 %[[TMP_B_PHI_A]], %[[TMP_B_A]]
+; CHECK-NEXT:    br label %[[LATCH_A:.*]]
+;
+; CHECK:       [[LATCH_A]]:
+; CHECK-NEXT:    %[[CMP2_A:.*]] = icmp slt i32 %[[TMP_B_SUM_A]], 42
+; CHECK:         br i1 %[[CMP2_A]], label %[[HEADER_A]], label %[[LOOP_EXIT_A:.*]]
+;
+; CHECK:       [[LOOP_EXIT_A]]:
+; CHECK-NEXT:    %[[LCSSA_A:.*]] = phi i32 [ %[[TMP_B_SUM_A]], %[[LATCH_A]] ]
+; CHECK-NEXT:    br label %loop.exit2
+
+body.b:
+  %tmp.b.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ], [ %tmp.a.sum, %body.a ]
+  %tmp.b = call i32 @b()
+  %tmp.b.sum = add i32 %tmp.b.phi, %tmp.b
+  br label %latch
+; Unswitched 'b' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_B]]:
+; CHECK-NEXT:    br label %[[HEADER_B:.*]]
+;
+; CHECK:       [[HEADER_B]]:
+; CHECK-NEXT:    %[[TMP_B:.*]] = call i32 @d()
+; CHECK-NEXT:    %[[CMP1_B:.*]] = icmp eq i32 %[[TMP_B]], 0
+; CHECK-NEXT:    br i1 %[[CMP1_B]], label %[[BODY_A_B:.*]], label %[[DISPATCH_B:.*]]
+;
+; CHECK:       [[DISPATCH_B]]:
+; CHECK-NEXT:    br label %[[BODY_B_B]]
+;
+; CHECK:       [[BODY_A_B]]:
+; CHECK-NEXT:    %[[TMP_A_PHI_B:.*]] = phi i32 [ 0, %[[HEADER_B]] ]
+; CHECK-NEXT:    %[[TMP_A_B:.*]] = call i32 @a()
+; CHECK-NEXT:    %[[TMP_A_SUM_B:.*]] = add i32 %[[TMP_A_PHI_B]], %[[TMP_A_B]]
+; CHECK-NEXT:    br label %[[BODY_B_B:.*]]
+;
+; CHECK:       [[BODY_B_B]]:
+; CHECK-NEXT:    %[[TMP_B_PHI_B:.*]] = phi i32 [ %[[TMP_B]], %[[DISPATCH_B]] ], [ %[[TMP_A_SUM_B]], %[[BODY_A_B]] ]
+; CHECK-NEXT:    %[[TMP_B_B:.*]] = call i32 @b()
+; CHECK-NEXT:    %[[TMP_B_SUM_B:.*]] = add i32 %[[TMP_B_PHI_B]], %[[TMP_B_B]]
+; CHECK-NEXT:    br label %[[LATCH_B:.*]]
+;
+; CHECK:       [[LATCH_B]]:
+; CHECK-NEXT:    %[[CMP2_B:.*]] = icmp slt i32 %[[TMP_B_SUM_B]], 42
+; CHECK:         br i1 %[[CMP2_B]], label %[[HEADER_B]], label %[[LOOP_EXIT_B:.*]]
+;
+; CHECK:       [[LOOP_EXIT_B]]:
+; CHECK-NEXT:    %[[LCSSA_B:.*]] = phi i32 [ %[[TMP_B_SUM_B]], %[[LATCH_B]] ]
+; CHECK-NEXT:    br label %[[LOOP_EXIT2_SPLIT:.*]]
+
+latch:
+  %cmp2 = icmp slt i32 %tmp.b.sum, 42
+  br i1 %cmp2, label %header, label %loop.exit2
+
+loop.exit1:
+  %l1.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ]
+  br label %exit
+; Unswitched 'exit' loop.
+;
+; CHECK:       [[ENTRY_SPLIT_EXIT]]:
+; CHECK-NEXT:    br label %[[HEADER_EXIT:.*]]
+;
+; CHECK:       [[HEADER_EXIT]]:
+; CHECK-NEXT:    %[[TMP_EXIT:.*]] = call i32 @d()
+; CHECK-NEXT:    %[[CMP1_EXIT:.*]] = icmp eq i32 %[[TMP_EXIT]], 0
+; CHECK-NEXT:    br i1 %[[CMP1_EXIT]], label %[[BODY_A_EXIT:.*]], label %[[DISPATCH_EXIT:.*]]
+;
+; CHECK:       [[DISPATCH_EXIT]]:
+; CHECK-NEXT:    %[[TMP_LCSSA:.*]] = phi i32 [ %[[TMP_EXIT]], %[[HEADER_EXIT]] ]
+; CHECK-NEXT:    br label %loop.exit1
+;
+; CHECK:       [[BODY_A_EXIT]]:
+; CHECK-NEXT:    %[[TMP_A_PHI_EXIT:.*]] = phi i32 [ 0, %[[HEADER_EXIT]] ]
+; CHECK-NEXT:    %[[TMP_A_EXIT:.*]] = call i32 @a()
+; CHECK-NEXT:    %[[TMP_A_SUM_EXIT:.*]] = add i32 %[[TMP_A_PHI_EXIT]], %[[TMP_A_EXIT]]
+; CHECK-NEXT:    br label %[[BODY_B_EXIT:.*]]
+;
+; CHECK:       [[BODY_B_EXIT]]:
+; CHECK-NEXT:    %[[TMP_B_PHI_EXIT:.*]] = phi i32 [ %[[TMP_A_SUM_EXIT]], %[[BODY_A_EXIT]] ]
+; CHECK-NEXT:    %[[TMP_B_EXIT:.*]] = call i32 @b()
+; CHECK-NEXT:    %[[TMP_B_SUM_EXIT:.*]] = add i32 %[[TMP_B_PHI_EXIT]], %[[TMP_B_EXIT]]
+; CHECK-NEXT:    br label %[[LATCH_EXIT:.*]]
+;
+; CHECK:       [[LATCH_EXIT]]:
+; CHECK-NEXT:    %[[CMP2_EXIT:.*]] = icmp slt i32 %[[TMP_B_SUM_EXIT]], 42
+; CHECK:         br i1 %[[CMP2_EXIT]], label %[[HEADER_EXIT]], label %[[LOOP_EXIT_EXIT:.*]]
+;
+; CHECK:       loop.exit1:
+; CHECK-NEXT:    %[[L1_PHI:.*]] = phi i32 [ %[[TMP_LCSSA]], %[[DISPATCH_EXIT]] ]
+; CHECK-NEXT:    br label %exit
+;
+; CHECK:       [[LOOP_EXIT_EXIT]]:
+; CHECK-NEXT:    %[[L2_PHI:.*]] = phi i32 [ %[[TMP_B_SUM_EXIT]], %[[LATCH_EXIT]] ]
+; CHECK-NEXT:    br label %[[LOOP_EXIT2_SPLIT]]
+
+loop.exit2:
+  %l2.phi = phi i32 [ %tmp.b.sum, %latch ]
+  br label %exit
+; CHECK:       [[LOOP_EXIT2_SPLIT]]:
+; CHECK-NEXT:    %[[LOOP_EXIT_PHI1:.*]] = phi i32 [ %[[L2_PHI]], %[[LOOP_EXIT_EXIT]] ], [ %[[LCSSA_B]], %[[LOOP_EXIT_B]] ]
+; CHECK-NEXT:    br label %loop.exit2
+;
+; CHECK:       loop.exit2:
+; CHECK-NEXT:    %[[LOOP_EXIT_PHI2:.*]] = phi i32 [ %[[LOOP_EXIT_PHI1]], %[[LOOP_EXIT2_SPLIT]] ], [ %[[LCSSA_A]], %[[LOOP_EXIT_A]] ]
+; CHECK-NEXT:    br label %exit
+
+exit:
+  %l.phi = phi i32 [ %l1.phi, %loop.exit1 ], [ %l2.phi, %loop.exit2 ]
+  ret i32 %l.phi
+; CHECK:       exit:
+; CHECK-NEXT:    %[[EXIT_PHI:.*]] = phi i32 [ %[[L1_PHI]], %loop.exit1 ], [ %[[LOOP_EXIT_PHI2]], %loop.exit2 ]
+; CHECK-NEXT:    ret i32 %[[EXIT_PHI]]
+}
+
+; Unswitch will not actually change the loop nest from:
+;   A < B < C
+define void @hoist_inner_loop0() {
+; CHECK-LABEL: define void @hoist_inner_loop0(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %v1 = call i1 @cond()
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK:       [[B_HEADER_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[C_HEADER_US:.*]]
+;
+; CHECK:       [[C_HEADER_US]]:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %[[B_LATCH_SPLIT_US:.*]]
+;
+; CHECK:       [[B_LATCH_SPLIT_US]]:
+; CHECK-NEXT:    br label %b.latch
+;
+; CHECK:       [[B_HEADER_SPLIT]]:
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  call i32 @c()
+  br i1 %v1, label %b.latch, label %c.latch
+; CHECK:       c.header:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %c.latch
+
+c.latch:
+  %v2 = call i1 @cond()
+  br i1 %v2, label %c.header, label %b.latch
+; CHECK:       c.latch:
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %c.header, label %[[B_LATCH_SPLIT:.*]]
+
+b.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %b.header, label %a.latch
+; CHECK:       [[B_LATCH_SPLIT]]:
+; CHECK-NEXT:    br label %b.latch
+;
+; CHECK:       b.latch:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; Unswitch will transform the loop nest from:
+;   A < B < C
+; into
+;   A < (B, C)
+define void @hoist_inner_loop1(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop1(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  %x.a = load i32, i32* %ptr
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    %x.a = load i32, i32* %ptr
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %x.b = load i32, i32* %ptr
+  %v1 = call i1 @cond()
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %x.b = load i32, i32* %ptr
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK:       [[B_HEADER_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[C_HEADER_US:.*]]
+;
+; CHECK:       [[C_HEADER_US]]:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %[[B_LATCH_US:.*]]
+;
+; CHECK:       [[B_LATCH_US]]:
+; CHECK-NEXT:    br label %b.latch
+;
+; CHECK:       [[B_HEADER_SPLIT]]:
+; CHECK-NEXT:    %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  call i32 @c()
+  br i1 %v1, label %b.latch, label %c.latch
+; CHECK:       c.header:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %c.latch
+
+c.latch:
+  ; Use values from other loops to check LCSSA form.
+  store i32 %x.a, i32* %ptr
+  store i32 %x.b, i32* %ptr
+  %v2 = call i1 @cond()
+  br i1 %v2, label %c.header, label %a.exit.c
+; CHECK:       c.latch:
+; CHECK-NEXT:    store i32 %x.a, i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %c.header, label %a.exit.c
+
+b.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %b.header, label %a.exit.b
+; CHECK:       b.latch:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %b.header, label %a.exit.b
+
+a.exit.c:
+  br label %a.latch
+; CHECK:       a.exit.c
+; CHECK-NEXT:    br label %a.latch
+
+a.exit.b:
+  br label %a.latch
+; CHECK:       a.exit.b:
+; CHECK-NEXT:    br label %a.latch
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; Unswitch will transform the loop nest from:
+;   A < B < C
+; into
+;   (A < B), C
+define void @hoist_inner_loop2(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop2(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  %x.a = load i32, i32* %ptr
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    %x.a = load i32, i32* %ptr
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %x.b = load i32, i32* %ptr
+  %v1 = call i1 @cond()
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %x.b = load i32, i32* %ptr
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK:       [[B_HEADER_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[C_HEADER_US:.*]]
+;
+; CHECK:       [[C_HEADER_US]]:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %[[B_LATCH_US:.*]]
+;
+; CHECK:       [[B_LATCH_US]]:
+; CHECK-NEXT:    br label %b.latch
+;
+; CHECK:       [[B_HEADER_SPLIT]]:
+; CHECK-NEXT:    %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT:    %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  call i32 @c()
+  br i1 %v1, label %b.latch, label %c.latch
+; CHECK:       c.header:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %c.latch
+
+c.latch:
+  ; Use values from other loops to check LCSSA form.
+  store i32 %x.a, i32* %ptr
+  store i32 %x.b, i32* %ptr
+  %v2 = call i1 @cond()
+  br i1 %v2, label %c.header, label %exit
+; CHECK:       c.latch:
+; CHECK-NEXT:    store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %c.header, label %exit
+
+b.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %b.header, label %a.latch
+; CHECK:       b.latch:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; Same as @hoist_inner_loop2 but with a nested loop inside the hoisted loop.
+; Unswitch will transform the loop nest from:
+;   A < B < C < D
+; into
+;   (A < B), (C < D)
+define void @hoist_inner_loop3(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop3(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  %x.a = load i32, i32* %ptr
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    %x.a = load i32, i32* %ptr
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %x.b = load i32, i32* %ptr
+  %v1 = call i1 @cond()
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %x.b = load i32, i32* %ptr
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %[[B_HEADER_SPLIT_US:.*]], label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK:       [[B_HEADER_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[C_HEADER_US:.*]]
+;
+; CHECK:       [[C_HEADER_US]]:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %[[B_LATCH_US:.*]]
+;
+; CHECK:       [[B_LATCH_US]]:
+; CHECK-NEXT:    br label %b.latch
+;
+; CHECK:       [[B_HEADER_SPLIT]]:
+; CHECK-NEXT:    %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT:    %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  call i32 @c()
+  br i1 %v1, label %b.latch, label %c.body
+; CHECK:       c.header:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %c.body
+
+c.body:
+  %x.c = load i32, i32* %ptr
+  br label %d.header
+; CHECK:       c.body:
+; CHECK-NEXT:    %x.c = load i32, i32* %ptr
+; CHECK-NEXT:    br label %d.header
+
+d.header:
+  ; Use values from other loops to check LCSSA form.
+  store i32 %x.a, i32* %ptr
+  store i32 %x.b, i32* %ptr
+  store i32 %x.c, i32* %ptr
+  %v2 = call i1 @cond()
+  br i1 %v2, label %d.header, label %c.latch
+; CHECK:       d.header:
+; CHECK-NEXT:    store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT:    store i32 %x.c, i32* %ptr
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %d.header, label %c.latch
+
+c.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %c.header, label %exit
+; CHECK:       c.latch:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %c.header, label %exit
+
+b.latch:
+  %v4 = call i1 @cond()
+  br i1 %v4, label %b.header, label %a.latch
+; CHECK:       b.latch:
+; CHECK-NEXT:    %v4 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v4, label %b.header, label %a.latch
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; This test is designed to exercise checking multiple remaining exits from the
+; loop being unswitched.
+; Unswitch will transform the loop nest from:
+;   A < B < C < D
+; into
+;   A < B < (C, D)
+define void @hoist_inner_loop4() {
+; CHECK-LABEL: define void @hoist_inner_loop4(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  %v1 = call i1 @cond()
+  br label %d.header
+; CHECK:       c.header:
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %[[C_HEADER_SPLIT_US:.*]], label %[[C_HEADER_SPLIT:.*]]
+;
+; CHECK:       [[C_HEADER_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[D_HEADER_US:.*]]
+;
+; CHECK:       [[D_HEADER_US]]:
+; CHECK-NEXT:    call i32 @d()
+; CHECK-NEXT:    br label %[[C_LATCH_US:.*]]
+;
+; CHECK:       [[C_LATCH_US]]:
+; CHECK-NEXT:    br label %c.latch
+;
+; CHECK:       [[C_HEADER_SPLIT]]:
+; CHECK-NEXT:    br label %d.header
+
+d.header:
+  call i32 @d()
+  br i1 %v1, label %c.latch, label %d.exiting1
+; CHECK:       d.header:
+; CHECK-NEXT:    call i32 @d()
+; CHECK-NEXT:    br label %d.exiting1
+
+d.exiting1:
+  %v2 = call i1 @cond()
+  br i1 %v2, label %d.exiting2, label %a.latch
+; CHECK:       d.exiting1:
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %d.exiting2, label %a.latch
+
+d.exiting2:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %d.exiting3, label %loopexit.d
+; CHECK:       d.exiting2:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %d.exiting3, label %loopexit.d
+
+d.exiting3:
+  %v4 = call i1 @cond()
+  br i1 %v4, label %d.latch, label %b.latch
+; CHECK:       d.exiting3:
+; CHECK-NEXT:    %v4 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v4, label %d.latch, label %b.latch
+
+d.latch:
+  br label %d.header
+; CHECK:       d.latch:
+; CHECK-NEXT:    br label %d.header
+
+c.latch:
+  %v5 = call i1 @cond()
+  br i1 %v5, label %c.header, label %loopexit.c
+; CHECK:       c.latch:
+; CHECK-NEXT:    %v5 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v5, label %c.header, label %loopexit.c
+
+b.latch:
+  br label %b.header
+; CHECK:       b.latch:
+; CHECK-NEXT:    br label %b.header
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+loopexit.d:
+  br label %exit
+; CHECK:       loopexit.d:
+; CHECK-NEXT:    br label %exit
+
+loopexit.c:
+  br label %exit
+; CHECK:       loopexit.c:
+; CHECK-NEXT:    br label %exit
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; Unswitch will transform the loop nest from:
+;   A < B < C < D
+; into
+;   A < ((B < C), D)
+define void @hoist_inner_loop5(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop5(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  %x.a = load i32, i32* %ptr
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    %x.a = load i32, i32* %ptr
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %x.b = load i32, i32* %ptr
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %x.b = load i32, i32* %ptr
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  %x.c = load i32, i32* %ptr
+  %v1 = call i1 @cond()
+  br label %d.header
+; CHECK:       c.header:
+; CHECK-NEXT:    %x.c = load i32, i32* %ptr
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %[[C_HEADER_SPLIT_US:.*]], label %[[C_HEADER_SPLIT:.*]]
+;
+; CHECK:       [[C_HEADER_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[D_HEADER_US:.*]]
+;
+; CHECK:       [[D_HEADER_US]]:
+; CHECK-NEXT:    call i32 @d()
+; CHECK-NEXT:    br label %[[C_LATCH_US:.*]]
+;
+; CHECK:       [[C_LATCH_US]]:
+; CHECK-NEXT:    br label %c.latch
+;
+; CHECK:       [[C_HEADER_SPLIT]]:
+; CHECK-NEXT:    %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %c.header ]
+; CHECK-NEXT:    %[[X_C_LCSSA:.*]] = phi i32 [ %x.c, %c.header ]
+; CHECK-NEXT:    br label %d.header
+
+d.header:
+  call i32 @d()
+  br i1 %v1, label %c.latch, label %d.latch
+; CHECK:       d.header:
+; CHECK-NEXT:    call i32 @d()
+; CHECK-NEXT:    br label %d.latch
+
+d.latch:
+  ; Use values from other loops to check LCSSA form.
+  store i32 %x.a, i32* %ptr
+  store i32 %x.b, i32* %ptr
+  store i32 %x.c, i32* %ptr
+  %v2 = call i1 @cond()
+  br i1 %v2, label %d.header, label %a.latch
+; CHECK:       d.latch:
+; CHECK-NEXT:    store i32 %x.a, i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_C_LCSSA]], i32* %ptr
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %d.header, label %a.latch
+
+c.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %c.header, label %b.latch
+; CHECK:       c.latch:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %c.header, label %b.latch
+
+b.latch:
+  br label %b.header
+; CHECK:       b.latch:
+; CHECK-NEXT:    br label %b.header
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+define void @hoist_inner_loop_switch(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop_switch(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  %x.a = load i32, i32* %ptr
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    %x.a = load i32, i32* %ptr
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %x.b = load i32, i32* %ptr
+  %v1 = call i32 @cond.i32()
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %x.b = load i32, i32* %ptr
+; CHECK-NEXT:    %v1 = call i32 @cond.i32()
+; CHECK-NEXT:    switch i32 %v1, label %[[B_HEADER_SPLIT:.*]] [
+; CHECK-NEXT:      i32 1, label %[[B_HEADER_SPLIT_US:.*]]
+; CHECK-NEXT:      i32 2, label %[[B_HEADER_SPLIT_US]]
+; CHECK-NEXT:      i32 3, label %[[B_HEADER_SPLIT_US]]
+; CHECK-NEXT:    ]
+;
+; CHECK:       [[B_HEADER_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[C_HEADER_US:.*]]
+;
+; CHECK:       [[C_HEADER_US]]:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %[[B_LATCH_US:.*]]
+;
+; CHECK:       [[B_LATCH_US]]:
+; CHECK-NEXT:    br label %b.latch
+;
+; CHECK:       [[B_HEADER_SPLIT]]:
+; CHECK-NEXT:    %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT:    %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  call i32 @c()
+  switch i32 %v1, label %c.latch [
+    i32 1, label %b.latch
+    i32 2, label %b.latch
+    i32 3, label %b.latch
+  ]
+; CHECK:       c.header:
+; CHECK-NEXT:    call i32 @c()
+; CHECK-NEXT:    br label %c.latch
+
+c.latch:
+  ; Use values from other loops to check LCSSA form.
+  store i32 %x.a, i32* %ptr
+  store i32 %x.b, i32* %ptr
+  %v2 = call i1 @cond()
+  br i1 %v2, label %c.header, label %exit
+; CHECK:       c.latch:
+; CHECK-NEXT:    store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %c.header, label %exit
+
+b.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %b.header, label %a.latch
+; CHECK:       b.latch:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; A devilish pattern. This is a crafty, crafty test case designed to risk
+; creating indirect cycles with trivial and non-trivial unswitching. The inner
+; loop has a switch with a trivial exit edge that can be unswitched, but the
+; rest of the switch cannot be unswitched because its cost is too high.
+; However, the unswitching of the trivial edge creates a new switch in the
+; outer loop. *This* switch isn't trivial, but has a low cost to unswitch. When
+; we unswitch this switch from the outer loop, we will remove it completely and
+; create a clone of the inner loop on one side. This clone will then again be
+; viable for unswitching the inner-most loop. This lets us check that the
+; unswitching doesn't end up cycling infinitely even when the cycle is
+; indirect and due to revisiting a loop after cloning.
+define void @test31(i32 %arg) {
+; CHECK-LABEL: define void @test31(
+entry:
+  br label %outer.header
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %arg, label %[[ENTRY_SPLIT:.*]] [
+; CHECK-NEXT:      i32 1, label %[[ENTRY_SPLIT_US:.*]]
+; CHECK-NEXT:      i32 2, label %[[ENTRY_SPLIT_US]]
+; CHECK-NEXT:    ]
+;
+; CHECK:       [[ENTRY_SPLIT_US]]:
+; CHECK-NEXT:    switch i32 %arg, label %[[ENTRY_SPLIT_US_SPLIT:.*]] [
+; CHECK-NEXT:      i32 1, label %[[ENTRY_SPLIT_US_SPLIT_US:.*]]
+; CHECK-NEXT:    ]
+
+outer.header:
+  br label %inner.header
+
+inner.header:
+  switch i32 %arg, label %inner.loopexit1 [
+    i32 1, label %inner.body1
+    i32 2, label %inner.body2
+  ]
+
+inner.body1:
+  %a = call i32 @a()
+  br label %inner.latch
+; The (super convoluted) fully unswitched loop around `@a`.
+;
+; CHECK:       [[ENTRY_SPLIT_US_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[OUTER_HEADER_US_US:.*]]
+;
+; CHECK:       [[OUTER_HEADER_US_US]]:
+; CHECK-NEXT:    br label %[[OUTER_HEADER_SPLIT_US_US:.*]]
+;
+; CHECK:       [[OUTER_LATCH_US_US:.*]]:
+; CHECK-NEXT:    %[[OUTER_COND_US_US:.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 %[[OUTER_COND_US_US]], label %[[OUTER_HEADER_US_US]], label %[[EXIT_SPLIT_US_SPLIT_US:.*]]
+;
+; CHECK:       [[OUTER_HEADER_SPLIT_US_US]]:
+; CHECK-NEXT:    br label %[[OUTER_HEADER_SPLIT_SPLIT_US_US_US:.*]]
+;
+; CHECK:       [[INNER_LOOPEXIT2_US_US:.*]]:
+; CHECK-NEXT:    br label %[[OUTER_LATCH_US_US]]
+;
+; CHECK:       [[OUTER_HEADER_SPLIT_SPLIT_US_US_US]]:
+; CHECK-NEXT:    br label %[[INNER_HEADER_US_US_US:.*]]
+;
+; CHECK:       [[INNER_HEADER_US_US_US]]:
+; CHECK-NEXT:    br label %[[INNER_BODY1_US_US_US:.*]]
+;
+; CHECK:       [[INNER_BODY1_US_US_US]]:
+; CHECK-NEXT:    %[[A:.*]] = call i32 @a()
+; CHECK-NEXT:    br label %[[INNER_LATCH_US_US_US:.*]]
+;
+; CHECK:       [[INNER_LATCH_US_US_US]]:
+; CHECK-NEXT:    %[[PHI_A:.*]] = phi i32 [ %[[A]], %[[INNER_BODY1_US_US_US]] ]
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 %[[PHI_A]])
+; CHECK-NEXT:    %[[INNER_COND_US_US_US:.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 %[[INNER_COND_US_US_US]], label %[[INNER_HEADER_US_US_US]], label %[[INNER_LOOPEXIT2_SPLIT_US_US_US:.*]]
+;
+; CHECK:       [[INNER_LOOPEXIT2_SPLIT_US_US_US]]:
+; CHECK-NEXT:    br label %[[INNER_LOOPEXIT2_US_US]]
+;
+; CHECK:       [[EXIT_SPLIT_US_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[EXIT_SPLIT_US:.*]]
+
+
+inner.body2:
+  %b = call i32 @b()
+  br label %inner.latch
+; The fully unswitched loop around `@b`.
+;
+; CHECK:       [[ENTRY_SPLIT_US_SPLIT]]:
+; CHECK-NEXT:    br label %[[OUTER_HEADER_US:.*]]
+;
+; CHECK:       [[OUTER_HEADER_US]]:
+; CHECK-NEXT:    br label %[[OUTER_HEADER_SPLIT_US:.*]]
+;
+; CHECK:       [[INNER_HEADER_US:.*]]:
+; CHECK-NEXT:    br label %[[INNER_BODY2_US:.*]]
+;
+; CHECK:       [[INNER_BODY2_US]]:
+; CHECK-NEXT:    %[[B:.*]] = call i32 @b()
+; CHECK-NEXT:    br label %[[INNER_LATCH_US:.*]]
+;
+; CHECK:       [[INNER_LATCH_US]]:
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 0)
+; CHECK-NEXT:    call void @sink1(i32 %[[B]])
+; CHECK-NEXT:    %[[INNER_COND_US:.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 %[[INNER_COND_US]], label %[[INNER_HEADER_US]], label %[[INNER_LOOPEXIT2_SPLIT_US:.*]]
+;
+; CHECK:       [[INNER_LOOPEXIT2_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[INNER_LOOPEXIT2_US:.*]]
+;
+; CHECK:       [[OUTER_LATCH_US:.*]]:
+; CHECK-NEXT:    %[[OUTER_COND_US:.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 %[[OUTER_COND_US]], label %[[OUTER_HEADER_US]], label %[[EXIT_SPLIT_US_SPLIT:.*]]
+;
+; CHECK:       [[OUTER_HEADER_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[OUTER_HEADER_SPLIT_SPLIT_US:.*]]
+;
+; CHECK:       [[OUTER_HEADER_SPLIT_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[INNER_HEADER_US]]
+;
+; CHECK:       [[INNER_LOOPEXIT2_US]]:
+; CHECK-NEXT:    br label %[[OUTER_LATCH_US]]
+;
+; CHECK:       [[EXIT_SPLIT_US]]:
+; CHECK-NEXT:    br label %exit
+
+inner.latch:
+  %phi = phi i32 [ %a, %inner.body1 ], [ %b, %inner.body2 ]
+  ; Make 10 junk calls here to ensure we're over the "50" cost threshold of
+  ; non-trivial unswitching for this inner switch.
+  call void @sink1(i32 0)
+  call void @sink1(i32 0)
+  call void @sink1(i32 0)
+  call void @sink1(i32 0)
+  call void @sink1(i32 0)
+  call void @sink1(i32 0)
+  call void @sink1(i32 0)
+  call void @sink1(i32 0)
+  call void @sink1(i32 0)
+  call void @sink1(i32 0)
+  call void @sink1(i32 %phi)
+  %inner.cond = call i1 @cond()
+  br i1 %inner.cond, label %inner.header, label %inner.loopexit2
+
+inner.loopexit1:
+  br label %outer.latch
+; The unswitched `loopexit1` path.
+;
+; CHECK:       [[ENTRY_SPLIT]]:
+; CHECK-NEXT:    br label %[[OUTER_HEADER:.*]]
+;
+; CHECK:       outer.header:
+; CHECK-NEXT:    br label %inner.loopexit1
+;
+; CHECK:       inner.loopexit1:
+; CHECK-NEXT:    br label %outer.latch
+;
+; CHECK:       outer.latch:
+; CHECK-NEXT:    %outer.cond = call i1 @cond()
+; CHECK-NEXT:    br i1 %outer.cond, label %outer.header, label %[[EXIT_SPLIT:.*]]
+;
+; CHECK:       [[EXIT_SPLIT]]:
+; CHECK-NEXT:    br label %exit
+
+inner.loopexit2:
+  br label %outer.latch
+
+outer.latch:
+  %outer.cond = call i1 @cond()
+  br i1 %outer.cond, label %outer.header, label %exit
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/pr37888.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/pr37888.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/pr37888.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/pr37888.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,39 @@
+; RUN: opt -simple-loop-unswitch -loop-deletion -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-mssa-loop-dependency=true -verify-memoryssa -loop-deletion -S < %s | FileCheck %s
+;
+; Check that when we do unswitching where we re-enqueue the loop to be processed
+; again, but manage to delete the loop before ever getting to iterate on it, it
+; doesn't crash the legacy pass manager.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @pr37888() {
+; CHECK-LABEL: define void @pr37888()
+entry:
+  %tobool = icmp ne i16 undef, 0
+  br label %for.body
+; CHECK:         %[[TOBOOL:.*]] = icmp ne
+; CHECK-NEXT:    br i1 %[[TOBOOL]], label %if.then, label %[[ENTRY_SPLIT:.*]]
+;
+; CHECK:       [[ENTRY_SPLIT]]:
+; CHECK-NEXT:    br label %for.end
+
+for.body:
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  unreachable
+; CHECK:       if.then:
+; CHECK-NEXT:    unreachable
+
+if.end:
+  br label %for.inc
+
+for.inc:
+  br i1 undef, label %for.body, label %for.end
+
+for.end:
+  ret void
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/preserve-analyses.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,130 @@
+; RUN: opt -simple-loop-unswitch -verify-loop-info -verify-dom-info -disable-output < %s
+; RUN: opt -simple-loop-unswitch -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output < %s
+
+; Loop unswitch should be able to unswitch these loops and
+; preserve LCSSA and LoopSimplify forms.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-apple-darwin9"
+
+ at delim1 = external global i32                     ; <i32*> [#uses=1]
+ at delim2 = external global i32                     ; <i32*> [#uses=1]
+
+define i32 @ineqn(i8* %s, i8* %p) nounwind readonly {
+entry:
+  %0 = load i32, i32* @delim1, align 4                 ; <i32> [#uses=1]
+  %1 = load i32, i32* @delim2, align 4                 ; <i32> [#uses=1]
+  br label %bb8.outer
+
+bb:                                               ; preds = %bb8
+  %2 = icmp eq i8* %p_addr.0, %s                  ; <i1> [#uses=1]
+  br i1 %2, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb
+  %3 = getelementptr inbounds i8, i8* %p_addr.0, i32 1 ; <i8*> [#uses=3]
+  switch i32 %ineq.0.ph, label %bb8.backedge [
+    i32 0, label %bb3
+    i32 1, label %bb6
+  ]
+
+bb8.backedge:                                     ; preds = %bb6, %bb5, %bb2
+  br label %bb8
+
+bb3:                                              ; preds = %bb2
+  %4 = icmp eq i32 %8, %0                         ; <i1> [#uses=1]
+  br i1 %4, label %bb8.outer.loopexit, label %bb5
+
+bb5:                                              ; preds = %bb3
+  br i1 %6, label %bb6, label %bb8.backedge
+
+bb6:                                              ; preds = %bb5, %bb2
+  %5 = icmp eq i32 %8, %1                         ; <i1> [#uses=1]
+  br i1 %5, label %bb7, label %bb8.backedge
+
+bb7:                                              ; preds = %bb6
+  %.lcssa1 = phi i8* [ %3, %bb6 ]                 ; <i8*> [#uses=1]
+  br label %bb8.outer.backedge
+
+bb8.outer.backedge:                               ; preds = %bb8.outer.loopexit, %bb7
+  %.lcssa2 = phi i8* [ %.lcssa1, %bb7 ], [ %.lcssa, %bb8.outer.loopexit ] ; <i8*> [#uses=1]
+  %ineq.0.ph.be = phi i32 [ 0, %bb7 ], [ 1, %bb8.outer.loopexit ] ; <i32> [#uses=1]
+  br label %bb8.outer
+
+bb8.outer.loopexit:                               ; preds = %bb3
+  %.lcssa = phi i8* [ %3, %bb3 ]                  ; <i8*> [#uses=1]
+  br label %bb8.outer.backedge
+
+bb8.outer:                                        ; preds = %bb8.outer.backedge, %entry
+  %ineq.0.ph = phi i32 [ 0, %entry ], [ %ineq.0.ph.be, %bb8.outer.backedge ] ; <i32> [#uses=3]
+  %p_addr.0.ph = phi i8* [ %p, %entry ], [ %.lcssa2, %bb8.outer.backedge ] ; <i8*> [#uses=1]
+  %6 = icmp eq i32 %ineq.0.ph, 1                  ; <i1> [#uses=1]
+  br label %bb8
+
+bb8:                                              ; preds = %bb8.outer, %bb8.backedge
+  %p_addr.0 = phi i8* [ %p_addr.0.ph, %bb8.outer ], [ %3, %bb8.backedge ] ; <i8*> [#uses=3]
+  %7 = load i8, i8* %p_addr.0, align 1                ; <i8> [#uses=2]
+  %8 = sext i8 %7 to i32                          ; <i32> [#uses=2]
+  %9 = icmp eq i8 %7, 0                           ; <i1> [#uses=1]
+  br i1 %9, label %bb10, label %bb
+
+bb10:                                             ; preds = %bb8, %bb
+  %.0 = phi i32 [ %ineq.0.ph, %bb ], [ 0, %bb8 ]  ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+; This is a simplified form of ineqn from above. It triggers some
+; different cases in the loop-unswitch code.
+
+define void @simplified_ineqn() nounwind readonly {
+entry:
+  br label %bb8.outer
+
+bb8.outer:                                        ; preds = %bb6, %bb2, %entry
+  %x = phi i32 [ 0, %entry ], [ 0, %bb6 ], [ 1, %bb2 ] ; <i32> [#uses=1]
+  br i1 undef, label %return, label %bb2
+
+bb2:                                              ; preds = %bb
+  switch i32 %x, label %bb6 [
+    i32 0, label %bb8.outer
+  ]
+
+bb6:                                              ; preds = %bb2
+  br i1 undef, label %bb8.outer, label %bb2
+
+return:                                             ; preds = %bb8, %bb
+  ret void
+}
+
+; This function requires special handling to preserve LCSSA form.
+; PR4934
+
+define void @pnp_check_irq() nounwind noredzone {
+entry:
+  %conv56 = trunc i64 undef to i32                ; <i32> [#uses=1]
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %while.cond.i.backedge, %entry
+  %call.i25 = call i8* @pci_get_device() nounwind noredzone ; <i8*> [#uses=2]
+  br i1 undef, label %if.then65, label %while.body.i
+
+while.body.i:                                     ; preds = %while.cond.i
+  br i1 undef, label %if.then31.i.i, label %while.cond.i.backedge
+
+while.cond.i.backedge:                            ; preds = %if.then31.i.i, %while.body.i
+  br label %while.cond.i
+
+if.then31.i.i:                                    ; preds = %while.body.i
+  switch i32 %conv56, label %while.cond.i.backedge [
+    i32 14, label %if.then42.i.i
+    i32 15, label %if.then42.i.i
+  ]
+
+if.then42.i.i:                                    ; preds = %if.then31.i.i, %if.then31.i.i
+  %call.i25.lcssa48 = phi i8* [ %call.i25, %if.then31.i.i ], [ %call.i25, %if.then31.i.i ] ; <i8*> [#uses=0]
+  unreachable
+
+if.then65:                                        ; preds = %while.cond.i
+  unreachable
+}
+
+declare i8* @pci_get_device() noredzone

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-iteration.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,42 @@
+; RUN: opt -passes='loop(loop-instsimplify,simplify-cfg,unswitch),verify<loops>' -S < %s | FileCheck %s
+; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='loop(loop-instsimplify,simplify-cfg,unswitch),verify<loops>' -S < %s | FileCheck %s
+
+declare void @some_func() noreturn
+
+define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test1(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split, label %loop_exit.split
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split.split, label %loop_exit
+;
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  br i1 %cond1, label %continue, label %loop_exit ; first trivial condition
+
+continue:
+  %var_val = load i32, i32* %var
+  %var_cond = trunc i32 %var_val to i1
+  %maybe_cond = select i1 %cond1, i1 %cond2, i1 %var_cond
+  br i1 %maybe_cond, label %do_something, label %loop_exit ; second trivial condition
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       loop_begin:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit:
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    ret
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,1245 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
+
+declare void @some_func() noreturn
+declare void @sink(i32)
+
+declare i1 @cond()
+declare i32 @cond.i32()
+
+; This test contains two trivial unswitch condition in one loop.
+; LoopUnswitch pass should be able to unswitch the second one
+; after unswitching the first one.
+define i32 @test1(i32* %var, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test1(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split, label %loop_exit.split
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split.split, label %loop_exit
+;
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  br i1 %cond1, label %continue, label %loop_exit	; first trivial condition
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %continue
+
+continue:
+  %var_val = load i32, i32* %var
+  br i1 %cond2, label %do_something, label %loop_exit	; second trivial condition
+; CHECK:       continue:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    br label %do_something
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       do_something:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit:
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    ret
+}
+
+; Test for two trivially unswitchable switches.
+define i32 @test3(i32* %var, i32 %cond1, i32 %cond2) {
+; CHECK-LABEL: @test3(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %cond1, label %entry.split [
+; CHECK-NEXT:      i32 0, label %loop_exit1
+; CHECK-NEXT:    ]
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    switch i32 %cond2, label %loop_exit2 [
+; CHECK-NEXT:      i32 42, label %loop_exit2
+; CHECK-NEXT:      i32 0, label %entry.split.split
+; CHECK-NEXT:    ]
+;
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  switch i32 %cond1, label %continue [
+    i32 0, label %loop_exit1
+  ]
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %continue
+
+continue:
+  %var_val = load i32, i32* %var
+  switch i32 %cond2, label %loop_exit2 [
+    i32 0, label %do_something
+    i32 42, label %loop_exit2
+  ]
+; CHECK:       continue:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    br label %do_something
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       do_something:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit1:
+  ret i32 0
+; CHECK:       loop_exit1:
+; CHECK-NEXT:    ret
+
+loop_exit2:
+  ret i32 0
+; CHECK:       loop_exit2:
+; CHECK-NEXT:    ret
+;
+; We shouldn't have any unreachable blocks here because the unswitched switches
+; turn into branches instead.
+; CHECK-NOT:     unreachable
+}
+
+; Test for a trivially unswitchable switch with multiple exiting cases and
+; multiple looping cases.
+define i32 @test4(i32* %var, i32 %cond1, i32 %cond2) {
+; CHECK-LABEL: @test4(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %cond2, label %loop_exit2 [
+; CHECK-NEXT:      i32 13, label %loop_exit1
+; CHECK-NEXT:      i32 42, label %loop_exit3
+; CHECK-NEXT:      i32 0, label %entry.split
+; CHECK-NEXT:      i32 1, label %entry.split
+; CHECK-NEXT:      i32 2, label %entry.split
+; CHECK-NEXT:    ]
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %var_val = load i32, i32* %var
+  switch i32 %cond2, label %loop_exit2 [
+    i32 0, label %loop0
+    i32 1, label %loop1
+    i32 13, label %loop_exit1
+    i32 2, label %loop2
+    i32 42, label %loop_exit3
+  ]
+; CHECK:       loop_begin:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    switch i32 %cond2, label %loop2 [
+; CHECK-NEXT:      i32 0, label %loop0
+; CHECK-NEXT:      i32 1, label %loop1
+; CHECK-NEXT:    ]
+
+loop0:
+  call void @some_func() noreturn nounwind
+  br label %loop_latch
+; CHECK:       loop0:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_latch
+
+loop1:
+  call void @some_func() noreturn nounwind
+  br label %loop_latch
+; CHECK:       loop1:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_latch
+
+loop2:
+  call void @some_func() noreturn nounwind
+  br label %loop_latch
+; CHECK:       loop2:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_latch
+
+loop_latch:
+  br label %loop_begin
+; CHECK:       loop_latch:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit1:
+  ret i32 0
+; CHECK:       loop_exit1:
+; CHECK-NEXT:    ret
+
+loop_exit2:
+  ret i32 0
+; CHECK:       loop_exit2:
+; CHECK-NEXT:    ret
+
+loop_exit3:
+  ret i32 0
+; CHECK:       loop_exit3:
+; CHECK-NEXT:    ret
+}
+
+; This test contains a trivially unswitchable branch with an LCSSA phi node in
+; a loop exit block.
+define i32 @test5(i1 %cond1, i32 %x, i32 %y) {
+; CHECK-LABEL: @test5(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split, label %loop_exit
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  br i1 %cond1, label %latch, label %loop_exit
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %latch
+
+latch:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       latch:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  %result1 = phi i32 [ %x, %loop_begin ]
+  %result2 = phi i32 [ %y, %loop_begin ]
+  %result = add i32 %result1, %result2
+  ret i32 %result
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[R1:.*]] = phi i32 [ %x, %entry ]
+; CHECK-NEXT:    %[[R2:.*]] = phi i32 [ %y, %entry ]
+; CHECK-NEXT:    %[[R:.*]] = add i32 %[[R1]], %[[R2]]
+; CHECK-NEXT:    ret i32 %[[R]]
+}
+
+; This test contains a trivially unswitchable branch with a real phi node in LCSSA
+; position in a shared exit block where a different path through the loop
+; produces a non-invariant input to the PHI node.
+define i32 @test6(i32* %var, i1 %cond1, i1 %cond2, i32 %x, i32 %y) {
+; CHECK-LABEL: @test6(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %{{.*}}, label %entry.split, label %loop_exit.split
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  br i1 %cond1, label %continue, label %loop_exit
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %continue
+
+continue:
+  %var_val = load i32, i32* %var
+  br i1 %cond2, label %latch, label %loop_exit
+; CHECK:       continue:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    br i1 %cond2, label %latch, label %loop_exit
+
+latch:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       latch:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  %result1 = phi i32 [ %x, %loop_begin ], [ %var_val, %continue ]
+  %result2 = phi i32 [ %var_val, %continue ], [ %y, %loop_begin ]
+  %result = add i32 %result1, %result2
+  ret i32 %result
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[R1:.*]] = phi i32 [ %var_val, %continue ]
+; CHECK-NEXT:    %[[R2:.*]] = phi i32 [ %var_val, %continue ]
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[R1S:.*]] = phi i32 [ %x, %entry ], [ %[[R1]], %loop_exit ]
+; CHECK-NEXT:    %[[R2S:.*]] = phi i32 [ %y, %entry ], [ %[[R2]], %loop_exit ]
+; CHECK-NEXT:    %[[R:.*]] = add i32 %[[R1S]], %[[R2S]]
+; CHECK-NEXT:    ret i32 %[[R]]
+}
+
+; This test contains a trivially unswitchable switch with an LCSSA phi node in
+; a loop exit block.
+define i32 @test7(i32 %cond1, i32 %x, i32 %y) {
+; CHECK-LABEL: @test7(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %cond1, label %entry.split [
+; CHECK-NEXT:      i32 0, label %loop_exit
+; CHECK-NEXT:      i32 1, label %loop_exit
+; CHECK-NEXT:    ]
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  switch i32 %cond1, label %latch [
+    i32 0, label %loop_exit
+    i32 1, label %loop_exit
+  ]
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %latch
+
+latch:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       latch:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  %result1 = phi i32 [ %x, %loop_begin ], [ %x, %loop_begin ]
+  %result2 = phi i32 [ %y, %loop_begin ], [ %y, %loop_begin ]
+  %result = add i32 %result1, %result2
+  ret i32 %result
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[R1:.*]] = phi i32 [ %x, %entry ], [ %x, %entry ]
+; CHECK-NEXT:    %[[R2:.*]] = phi i32 [ %y, %entry ], [ %y, %entry ]
+; CHECK-NEXT:    %[[R:.*]] = add i32 %[[R1]], %[[R2]]
+; CHECK-NEXT:    ret i32 %[[R]]
+}
+
+; This test contains a trivially unswitchable switch with a real phi node in
+; LCSSA position in a shared exit block where a different path through the loop
+; produces a non-invariant input to the PHI node.
+define i32 @test8(i32* %var, i32 %cond1, i32 %cond2, i32 %x, i32 %y) {
+; CHECK-LABEL: @test8(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %cond1, label %entry.split [
+; CHECK-NEXT:      i32 0, label %loop_exit.split
+; CHECK-NEXT:      i32 1, label %loop_exit2
+; CHECK-NEXT:      i32 2, label %loop_exit.split
+; CHECK-NEXT:    ]
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  switch i32 %cond1, label %continue [
+    i32 0, label %loop_exit
+    i32 1, label %loop_exit2
+    i32 2, label %loop_exit
+  ]
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %continue
+
+continue:
+  %var_val = load i32, i32* %var
+  switch i32 %cond2, label %latch [
+    i32 0, label %loop_exit
+  ]
+; CHECK:       continue:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    switch i32 %cond2, label %latch [
+; CHECK-NEXT:      i32 0, label %loop_exit
+; CHECK-NEXT:    ]
+
+latch:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       latch:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  %result1.1 = phi i32 [ %x, %loop_begin ], [ %x, %loop_begin ], [ %var_val, %continue ]
+  %result1.2 = phi i32 [ %var_val, %continue ], [ %y, %loop_begin ], [ %y, %loop_begin ]
+  %result1 = add i32 %result1.1, %result1.2
+  ret i32 %result1
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[R1:.*]] = phi i32 [ %var_val, %continue ]
+; CHECK-NEXT:    %[[R2:.*]] = phi i32 [ %var_val, %continue ]
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[R1S:.*]] = phi i32 [ %x, %entry ], [ %x, %entry ], [ %[[R1]], %loop_exit ]
+; CHECK-NEXT:    %[[R2S:.*]] = phi i32 [ %y, %entry ], [ %y, %entry ], [ %[[R2]], %loop_exit ]
+; CHECK-NEXT:    %[[R:.*]] = add i32 %[[R1S]], %[[R2S]]
+; CHECK-NEXT:    ret i32 %[[R]]
+
+loop_exit2:
+  %result2.1 = phi i32 [ %x, %loop_begin ]
+  %result2.2 = phi i32 [ %y, %loop_begin ]
+  %result2 = add i32 %result2.1, %result2.2
+  ret i32 %result2
+; CHECK:       loop_exit2:
+; CHECK-NEXT:    %[[R1:.*]] = phi i32 [ %x, %entry ]
+; CHECK-NEXT:    %[[R2:.*]] = phi i32 [ %y, %entry ]
+; CHECK-NEXT:    %[[R:.*]] = add i32 %[[R1]], %[[R2]]
+; CHECK-NEXT:    ret i32 %[[R]]
+}
+
+; This test, extracted from the LLVM test suite, has an interesting dominator
+; tree to update as there are edges to sibling domtree nodes within child
+; domtree nodes of the unswitched node.
+define void @xgets(i1 %cond1, i1* %cond2.ptr) {
+; CHECK-LABEL: @xgets(
+entry:
+  br label %for.cond.preheader
+; CHECK:       entry:
+; CHECK-NEXT:    br label %for.cond.preheader
+
+for.cond.preheader:
+  br label %for.cond
+; CHECK:       for.cond.preheader:
+; CHECK-NEXT:    br i1 %cond1, label %for.cond.preheader.split, label %if.end17.thread.loopexit
+;
+; CHECK:       for.cond.preheader.split:
+; CHECK-NEXT:    br label %for.cond
+
+for.cond:
+  br i1 %cond1, label %land.lhs.true, label %if.end17.thread.loopexit
+; CHECK:       for.cond:
+; CHECK-NEXT:    br label %land.lhs.true
+
+land.lhs.true:
+  br label %if.then20
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    br label %if.then20
+
+if.then20:
+  %cond2 = load volatile i1, i1* %cond2.ptr
+  br i1 %cond2, label %if.then23, label %if.else
+; CHECK:       if.then20:
+; CHECK-NEXT:    %[[COND2:.*]] = load volatile i1, i1* %cond2.ptr
+; CHECK-NEXT:    br i1 %[[COND2]], label %if.then23, label %if.else
+
+if.else:
+  br label %for.cond
+; CHECK:       if.else:
+; CHECK-NEXT:    br label %for.cond
+
+if.end17.thread.loopexit:
+  br label %if.end17.thread
+; CHECK:       if.end17.thread.loopexit:
+; CHECK-NEXT:    br label %if.end17.thread
+
+if.end17.thread:
+  br label %cleanup
+; CHECK:       if.end17.thread:
+; CHECK-NEXT:    br label %cleanup
+
+if.then23:
+  br label %cleanup
+; CHECK:       if.then23:
+; CHECK-NEXT:    br label %cleanup
+
+cleanup:
+  ret void
+; CHECK:       cleanup:
+; CHECK-NEXT:    ret void
+}
+
+define i32 @test_partial_condition_unswitch_and(i32* %var, i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test_partial_condition_unswitch_and(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond1, label %entry.split, label %loop_exit.split
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br i1 %cond2, label %entry.split.split, label %loop_exit
+;
+; CHECK:       entry.split.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  br i1 %cond1, label %continue, label %loop_exit
+; CHECK:       loop_begin:
+; CHECK-NEXT:    br label %continue
+
+continue:
+  %var_val = load i32, i32* %var
+  %var_cond = trunc i32 %var_val to i1
+  %cond_and = and i1 %var_cond, %cond2
+  br i1 %cond_and, label %do_something, label %loop_exit
+; CHECK:       continue:
+; CHECK-NEXT:    %[[VAR:.*]] = load i32
+; CHECK-NEXT:    %[[VAR_COND:.*]] = trunc i32 %[[VAR]] to i1
+; CHECK-NEXT:    %[[COND_AND:.*]] = and i1 %[[VAR_COND]], true
+; CHECK-NEXT:    br i1 %[[COND_AND]], label %do_something, label %loop_exit
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       do_something:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit:
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    ret
+}
+
+define i32 @test_partial_condition_unswitch_or(i32* %var, i1 %cond1, i1 %cond2, i1 %cond3, i1 %cond4, i1 %cond5, i1 %cond6) {
+; CHECK-LABEL: @test_partial_condition_unswitch_or(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[INV_OR1:.*]] = or i1 %cond4, %cond2
+; CHECK-NEXT:    %[[INV_OR2:.*]] = or i1 %[[INV_OR1]], %cond3
+; CHECK-NEXT:    %[[INV_OR3:.*]] = or i1 %[[INV_OR2]], %cond1
+; CHECK-NEXT:    br i1 %[[INV_OR3]], label %loop_exit.split, label %entry.split
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %var_val = load i32, i32* %var
+  %var_cond = trunc i32 %var_val to i1
+  %cond_or1 = or i1 %var_cond, %cond1
+  %cond_or2 = or i1 %cond2, %cond3
+  %cond_or3 = or i1 %cond_or1, %cond_or2
+  %cond_xor1 = xor i1 %cond5, %var_cond
+  %cond_and1 = and i1 %cond6, %var_cond
+  %cond_or4 = or i1 %cond_xor1, %cond_and1
+  %cond_or5 = or i1 %cond_or3, %cond_or4
+  %cond_or6 = or i1 %cond_or5, %cond4
+  br i1 %cond_or6, label %loop_exit, label %do_something
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[VAR:.*]] = load i32
+; CHECK-NEXT:    %[[VAR_COND:.*]] = trunc i32 %[[VAR]] to i1
+; CHECK-NEXT:    %[[COND_OR1:.*]] = or i1 %[[VAR_COND]], false
+; CHECK-NEXT:    %[[COND_OR2:.*]] = or i1 false, false
+; CHECK-NEXT:    %[[COND_OR3:.*]] = or i1 %[[COND_OR1]], %[[COND_OR2]]
+; CHECK-NEXT:    %[[COND_XOR:.*]] = xor i1 %cond5, %[[VAR_COND]]
+; CHECK-NEXT:    %[[COND_AND:.*]] = and i1 %cond6, %[[VAR_COND]]
+; CHECK-NEXT:    %[[COND_OR4:.*]] = or i1 %[[COND_XOR]], %[[COND_AND]]
+; CHECK-NEXT:    %[[COND_OR5:.*]] = or i1 %[[COND_OR3]], %[[COND_OR4]]
+; CHECK-NEXT:    %[[COND_OR6:.*]] = or i1 %[[COND_OR5]], false
+; CHECK-NEXT:    br i1 %[[COND_OR6]], label %loop_exit, label %do_something
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       do_something:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  ret i32 0
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    ret
+}
+
+define i32 @test_partial_condition_unswitch_with_lcssa_phi1(i32* %var, i1 %cond, i32 %x) {
+; CHECK-LABEL: @test_partial_condition_unswitch_with_lcssa_phi1(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split, label %loop_exit.split
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %var_val = load i32, i32* %var
+  %var_cond = trunc i32 %var_val to i1
+  %cond_and = and i1 %var_cond, %cond
+  br i1 %cond_and, label %do_something, label %loop_exit
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[VAR:.*]] = load i32
+; CHECK-NEXT:    %[[VAR_COND:.*]] = trunc i32 %[[VAR]] to i1
+; CHECK-NEXT:    %[[COND_AND:.*]] = and i1 %[[VAR_COND]], true
+; CHECK-NEXT:    br i1 %[[COND_AND]], label %do_something, label %loop_exit
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+; CHECK:       do_something:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br label %loop_begin
+
+loop_exit:
+  %x.lcssa = phi i32 [ %x, %loop_begin ]
+  ret i32 %x.lcssa
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[LCSSA:.*]] = phi i32 [ %x, %loop_begin ]
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[LCSSA_SPLIT:.*]] = phi i32 [ %x, %entry ], [ %[[LCSSA]], %loop_exit ]
+; CHECK-NEXT:    ret i32 %[[LCSSA_SPLIT]]
+}
+
+define i32 @test_partial_condition_unswitch_with_lcssa_phi2(i32* %var, i1 %cond, i32 %x, i32 %y) {
+; CHECK-LABEL: @test_partial_condition_unswitch_with_lcssa_phi2(
+entry:
+  br label %loop_begin
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 %cond, label %entry.split, label %loop_exit.split
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %loop_begin
+
+loop_begin:
+  %var_val = load i32, i32* %var
+  %var_cond = trunc i32 %var_val to i1
+  %cond_and = and i1 %var_cond, %cond
+  br i1 %cond_and, label %do_something, label %loop_exit
+; CHECK:       loop_begin:
+; CHECK-NEXT:    %[[VAR:.*]] = load i32
+; CHECK-NEXT:    %[[VAR_COND:.*]] = trunc i32 %[[VAR]] to i1
+; CHECK-NEXT:    %[[COND_AND:.*]] = and i1 %[[VAR_COND]], true
+; CHECK-NEXT:    br i1 %[[COND_AND]], label %do_something, label %loop_exit
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br i1 %var_cond, label %loop_begin, label %loop_exit
+; CHECK:       do_something:
+; CHECK-NEXT:    call
+; CHECK-NEXT:    br i1 %[[VAR_COND]], label %loop_begin, label %loop_exit
+
+loop_exit:
+  %xy.lcssa = phi i32 [ %x, %loop_begin ], [ %y, %do_something ]
+  ret i32 %xy.lcssa
+; CHECK:       loop_exit:
+; CHECK-NEXT:    %[[LCSSA:.*]] = phi i32 [ %x, %loop_begin ], [ %y, %do_something ]
+; CHECK-NEXT:    br label %loop_exit.split
+;
+; CHECK:       loop_exit.split:
+; CHECK-NEXT:    %[[LCSSA_SPLIT:.*]] = phi i32 [ %x, %entry ], [ %[[LCSSA]], %loop_exit ]
+; CHECK-NEXT:    ret i32 %[[LCSSA_SPLIT]]
+}
+
+; Unswitch will not actually change the loop nest from:
+;   A < B < C
+define void @hoist_inner_loop0() {
+; CHECK-LABEL: define void @hoist_inner_loop0(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %v1 = call i1 @cond()
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %[[B_LATCH_SPLIT:.*]], label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK:       [[B_HEADER_SPLIT]]:
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  br i1 %v1, label %b.latch, label %c.latch
+; CHECK:       c.header:
+; CHECK-NEXT:    br label %c.latch
+
+c.latch:
+  %v2 = call i1 @cond()
+  br i1 %v2, label %c.header, label %b.latch
+; CHECK:       c.latch:
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %c.header, label %b.latch
+
+b.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %b.header, label %a.latch
+; CHECK:       b.latch:
+; CHECK-NEXT:    br label %[[B_LATCH_SPLIT]]
+;
+; CHECK:       [[B_LATCH_SPLIT]]:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; Unswitch will transform the loop nest from:
+;   A < B < C
+; into
+;   A < (B, C)
+define void @hoist_inner_loop1(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop1(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  %x.a = load i32, i32* %ptr
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    %x.a = load i32, i32* %ptr
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %x.b = load i32, i32* %ptr
+  %v1 = call i1 @cond()
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %x.b = load i32, i32* %ptr
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %b.latch, label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK:       [[B_HEADER_SPLIT]]:
+; CHECK-NEXT:    %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  br i1 %v1, label %b.latch, label %c.latch
+; CHECK:       c.header:
+; CHECK-NEXT:    br label %c.latch
+
+c.latch:
+  ; Use values from other loops to check LCSSA form.
+  store i32 %x.a, i32* %ptr
+  store i32 %x.b, i32* %ptr
+  %v2 = call i1 @cond()
+  br i1 %v2, label %c.header, label %a.exit.c
+; CHECK:       c.latch:
+; CHECK-NEXT:    store i32 %x.a, i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %c.header, label %a.exit.c
+
+b.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %b.header, label %a.exit.b
+; CHECK:       b.latch:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %b.header, label %a.exit.b
+
+a.exit.c:
+  br label %a.latch
+; CHECK:       a.exit.c
+; CHECK-NEXT:    br label %a.latch
+
+a.exit.b:
+  br label %a.latch
+; CHECK:       a.exit.b:
+; CHECK-NEXT:    br label %a.latch
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; Unswitch will transform the loop nest from:
+;   A < B < C
+; into
+;   (A < B), C
+define void @hoist_inner_loop2(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop2(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  %x.a = load i32, i32* %ptr
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    %x.a = load i32, i32* %ptr
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %x.b = load i32, i32* %ptr
+  %v1 = call i1 @cond()
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %x.b = load i32, i32* %ptr
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %b.latch, label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK:       [[B_HEADER_SPLIT]]:
+; CHECK-NEXT:    %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT:    %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  br i1 %v1, label %b.latch, label %c.latch
+; CHECK:       c.header:
+; CHECK-NEXT:    br label %c.latch
+
+c.latch:
+  ; Use values from other loops to check LCSSA form.
+  store i32 %x.a, i32* %ptr
+  store i32 %x.b, i32* %ptr
+  %v2 = call i1 @cond()
+  br i1 %v2, label %c.header, label %exit
+; CHECK:       c.latch:
+; CHECK-NEXT:    store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %c.header, label %exit
+
+b.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %b.header, label %a.latch
+; CHECK:       b.latch:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; Same as @hoist_inner_loop2 but with a nested loop inside the hoisted loop.
+; Unswitch will transform the loop nest from:
+;   A < B < C < D
+; into
+;   (A < B), (C < D)
+define void @hoist_inner_loop3(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop3(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  %x.a = load i32, i32* %ptr
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    %x.a = load i32, i32* %ptr
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %x.b = load i32, i32* %ptr
+  %v1 = call i1 @cond()
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %x.b = load i32, i32* %ptr
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %b.latch, label %[[B_HEADER_SPLIT:.*]]
+;
+; CHECK:       [[B_HEADER_SPLIT]]:
+; CHECK-NEXT:    %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT:    %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  br i1 %v1, label %b.latch, label %c.body
+; CHECK:       c.header:
+; CHECK-NEXT:    br label %c.body
+
+c.body:
+  %x.c = load i32, i32* %ptr
+  br label %d.header
+; CHECK:       c.body:
+; CHECK-NEXT:    %x.c = load i32, i32* %ptr
+; CHECK-NEXT:    br label %d.header
+
+d.header:
+  ; Use values from other loops to check LCSSA form.
+  store i32 %x.a, i32* %ptr
+  store i32 %x.b, i32* %ptr
+  store i32 %x.c, i32* %ptr
+  %v2 = call i1 @cond()
+  br i1 %v2, label %d.header, label %c.latch
+; CHECK:       d.header:
+; CHECK-NEXT:    store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT:    store i32 %x.c, i32* %ptr
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %d.header, label %c.latch
+
+c.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %c.header, label %exit
+; CHECK:       c.latch:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %c.header, label %exit
+
+b.latch:
+  %v4 = call i1 @cond()
+  br i1 %v4, label %b.header, label %a.latch
+; CHECK:       b.latch:
+; CHECK-NEXT:    %v4 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v4, label %b.header, label %a.latch
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; This test is designed to exercise checking multiple remaining exits from the
+; loop being unswitched.
+; Unswitch will transform the loop nest from:
+;   A < B < C < D
+; into
+;   A < B < (C, D)
+define void @hoist_inner_loop4() {
+; CHECK-LABEL: define void @hoist_inner_loop4(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  %v1 = call i1 @cond()
+  br label %d.header
+; CHECK:       c.header:
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %[[C_HEADER_SPLIT:.*]], label %c.latch
+;
+; CHECK:       [[C_HEADER_SPLIT]]:
+; CHECK-NEXT:    br label %d.header
+
+d.header:
+  br i1 %v1, label %d.exiting1, label %c.latch
+; CHECK:       d.header:
+; CHECK-NEXT:    br label %d.exiting1
+
+d.exiting1:
+  %v2 = call i1 @cond()
+  br i1 %v2, label %d.exiting2, label %a.latch
+; CHECK:       d.exiting1:
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %d.exiting2, label %a.latch
+
+d.exiting2:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %d.exiting3, label %loopexit.d
+; CHECK:       d.exiting2:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %d.exiting3, label %loopexit.d
+
+d.exiting3:
+  %v4 = call i1 @cond()
+  br i1 %v4, label %d.latch, label %b.latch
+; CHECK:       d.exiting3:
+; CHECK-NEXT:    %v4 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v4, label %d.latch, label %b.latch
+
+d.latch:
+  br label %d.header
+; CHECK:       d.latch:
+; CHECK-NEXT:    br label %d.header
+
+c.latch:
+  %v5 = call i1 @cond()
+  br i1 %v5, label %c.header, label %loopexit.c
+; CHECK:       c.latch:
+; CHECK-NEXT:    %v5 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v5, label %c.header, label %loopexit.c
+
+b.latch:
+  br label %b.header
+; CHECK:       b.latch:
+; CHECK-NEXT:    br label %b.header
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+loopexit.d:
+  br label %exit
+; CHECK:       loopexit.d:
+; CHECK-NEXT:    br label %exit
+
+loopexit.c:
+  br label %exit
+; CHECK:       loopexit.c:
+; CHECK-NEXT:    br label %exit
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; Unswitch will transform the loop nest from:
+;   A < B < C < D
+; into
+;   A < ((B < C), D)
+define void @hoist_inner_loop5(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop5(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  %x.a = load i32, i32* %ptr
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    %x.a = load i32, i32* %ptr
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %x.b = load i32, i32* %ptr
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %x.b = load i32, i32* %ptr
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  %x.c = load i32, i32* %ptr
+  %v1 = call i1 @cond()
+  br label %d.header
+; CHECK:       c.header:
+; CHECK-NEXT:    %x.c = load i32, i32* %ptr
+; CHECK-NEXT:    %v1 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v1, label %c.latch, label %[[C_HEADER_SPLIT:.*]]
+;
+; CHECK:       [[C_HEADER_SPLIT]]:
+; CHECK-NEXT:    %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %c.header ]
+; CHECK-NEXT:    %[[X_C_LCSSA:.*]] = phi i32 [ %x.c, %c.header ]
+; CHECK-NEXT:    br label %d.header
+
+d.header:
+  br i1 %v1, label %c.latch, label %d.latch
+; CHECK:       d.header:
+; CHECK-NEXT:    br label %d.latch
+
+d.latch:
+  ; Use values from other loops to check LCSSA form.
+  store i32 %x.a, i32* %ptr
+  store i32 %x.b, i32* %ptr
+  store i32 %x.c, i32* %ptr
+  %v2 = call i1 @cond()
+  br i1 %v2, label %d.header, label %a.latch
+; CHECK:       d.latch:
+; CHECK-NEXT:    store i32 %x.a, i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_C_LCSSA]], i32* %ptr
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %d.header, label %a.latch
+
+c.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %c.header, label %b.latch
+; CHECK:       c.latch:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %c.header, label %b.latch
+
+b.latch:
+  br label %b.header
+; CHECK:       b.latch:
+; CHECK-NEXT:    br label %b.header
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+; Same as `@hoist_inner_loop2` but using a switch.
+; Unswitch will transform the loop nest from:
+;   A < B < C
+; into
+;   (A < B), C
+define void @hoist_inner_loop_switch(i32* %ptr) {
+; CHECK-LABEL: define void @hoist_inner_loop_switch(
+entry:
+  br label %a.header
+; CHECK:       entry:
+; CHECK-NEXT:    br label %a.header
+
+a.header:
+  %x.a = load i32, i32* %ptr
+  br label %b.header
+; CHECK:       a.header:
+; CHECK-NEXT:    %x.a = load i32, i32* %ptr
+; CHECK-NEXT:    br label %b.header
+
+b.header:
+  %x.b = load i32, i32* %ptr
+  %v1 = call i32 @cond.i32()
+  br label %c.header
+; CHECK:       b.header:
+; CHECK-NEXT:    %x.b = load i32, i32* %ptr
+; CHECK-NEXT:    %v1 = call i32 @cond.i32()
+; CHECK-NEXT:    switch i32 %v1, label %[[B_HEADER_SPLIT:.*]] [
+; CHECK-NEXT:      i32 1, label %b.latch
+; CHECK-NEXT:      i32 2, label %b.latch
+; CHECK-NEXT:      i32 3, label %b.latch
+; CHECK-NEXT:    ]
+;
+; CHECK:       [[B_HEADER_SPLIT]]:
+; CHECK-NEXT:    %[[X_A_LCSSA:.*]] = phi i32 [ %x.a, %b.header ]
+; CHECK-NEXT:    %[[X_B_LCSSA:.*]] = phi i32 [ %x.b, %b.header ]
+; CHECK-NEXT:    br label %c.header
+
+c.header:
+  switch i32 %v1, label %c.latch [
+    i32 1, label %b.latch
+    i32 2, label %b.latch
+    i32 3, label %b.latch
+  ]
+; CHECK:       c.header:
+; CHECK-NEXT:    br label %c.latch
+
+c.latch:
+  ; Use values from other loops to check LCSSA form.
+  store i32 %x.a, i32* %ptr
+  store i32 %x.b, i32* %ptr
+  %v2 = call i1 @cond()
+  br i1 %v2, label %c.header, label %exit
+; CHECK:       c.latch:
+; CHECK-NEXT:    store i32 %[[X_A_LCSSA]], i32* %ptr
+; CHECK-NEXT:    store i32 %[[X_B_LCSSA]], i32* %ptr
+; CHECK-NEXT:    %v2 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v2, label %c.header, label %exit
+
+b.latch:
+  %v3 = call i1 @cond()
+  br i1 %v3, label %b.header, label %a.latch
+; CHECK:       b.latch:
+; CHECK-NEXT:    %v3 = call i1 @cond()
+; CHECK-NEXT:    br i1 %v3, label %b.header, label %a.latch
+
+a.latch:
+  br label %a.header
+; CHECK:       a.latch:
+; CHECK-NEXT:    br label %a.header
+
+exit:
+  ret void
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+}
+
+define void @test_unswitch_to_common_succ_with_phis(i32* %var, i32 %cond) {
+; CHECK-LABEL: @test_unswitch_to_common_succ_with_phis(
+entry:
+  br label %header
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %cond, label %loopexit1 [
+; CHECK-NEXT:      i32 13, label %loopexit2
+; CHECK-NEXT:      i32 0, label %entry.split
+; CHECK-NEXT:      i32 1, label %entry.split
+; CHECK-NEXT:    ]
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %header
+
+header:
+  %var_val = load i32, i32* %var
+  switch i32 %cond, label %loopexit1 [
+    i32 0, label %latch
+    i32 1, label %latch
+    i32 13, label %loopexit2
+  ]
+; CHECK:       header:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    br label %latch
+
+latch:
+  ; No-op PHI node to exercise weird PHI update scenarios.
+  %phi = phi i32 [ %var_val, %header ], [ %var_val, %header ]
+  call void @sink(i32 %phi)
+  br label %header
+; CHECK:       latch:
+; CHECK-NEXT:    %[[PHI:.*]] = phi i32 [ %var_val, %header ]
+; CHECK-NEXT:    call void @sink(i32 %[[PHI]])
+; CHECK-NEXT:    br label %header
+
+loopexit1:
+  ret void
+; CHECK:       loopexit1:
+; CHECK-NEXT:    ret
+
+loopexit2:
+  ret void
+; CHECK:       loopexit2:
+; CHECK-NEXT:    ret
+}
+
+define void @test_unswitch_to_default_common_succ_with_phis(i32* %var, i32 %cond) {
+; CHECK-LABEL: @test_unswitch_to_default_common_succ_with_phis(
+entry:
+  br label %header
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 %cond, label %entry.split [
+; CHECK-NEXT:      i32 13, label %loopexit
+; CHECK-NEXT:    ]
+;
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label %header
+
+header:
+  %var_val = load i32, i32* %var
+  switch i32 %cond, label %latch [
+    i32 0, label %latch
+    i32 1, label %latch
+    i32 13, label %loopexit
+  ]
+; CHECK:       header:
+; CHECK-NEXT:    load
+; CHECK-NEXT:    br label %latch
+
+latch:
+  ; No-op PHI node to exercise weird PHI update scenarios.
+  %phi = phi i32 [ %var_val, %header ], [ %var_val, %header ], [ %var_val, %header ]
+  call void @sink(i32 %phi)
+  br label %header
+; CHECK:       latch:
+; CHECK-NEXT:    %[[PHI:.*]] = phi i32 [ %var_val, %header ]
+; CHECK-NEXT:    call void @sink(i32 %[[PHI]])
+; CHECK-NEXT:    br label %header
+
+loopexit:
+  ret void
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret
+}

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/update-scev.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/update-scev.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/update-scev.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/update-scev.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,187 @@
+; RUN: opt -passes='print<scalar-evolution>,loop(unswitch,loop-instsimplify),print<scalar-evolution>' -enable-nontrivial-unswitch -S < %s 2>%t.scev | FileCheck %s
+; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='print<scalar-evolution>,loop(unswitch,loop-instsimplify),print<scalar-evolution>' -enable-nontrivial-unswitch -S < %s 2>%t.scev | FileCheck %s
+; RUN: FileCheck %s --check-prefix=SCEV < %t.scev
+
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @f()
+
+; Check that trivially unswitching an inner loop resets both the inner and outer
+; loop trip count.
+define void @test1(i32 %n, i32 %m, i1 %cond) {
+; Check that SCEV has no trip count before unswitching.
+; SCEV-LABEL: Determining loop execution counts for: @test1
+; SCEV: Loop %inner_loop_begin: <multiple exits> Unpredictable backedge-taken count.
+; SCEV: Loop %outer_loop_begin: Unpredictable backedge-taken count.
+;
+; Now check that after unswitching and simplifying instructions we get clean
+; backedge-taken counts.
+; SCEV-LABEL: Determining loop execution counts for: @test1
+; SCEV: Loop %inner_loop_begin: backedge-taken count is (-1 + (1 smax %m))<nsw>
+; SCEV: Loop %outer_loop_begin: backedge-taken count is (-1 + (1 smax %n))<nsw>
+;
+; And verify the code matches what we expect.
+; CHECK-LABEL: define void @test1(
+entry:
+  br label %outer_loop_begin
+; Ensure the outer loop didn't get unswitched.
+; CHECK:       entry:
+; CHECK-NEXT:    br label %outer_loop_begin
+
+outer_loop_begin:
+  %i = phi i32 [ %i.next, %outer_loop_latch ], [ 0, %entry ]
+  ; Block unswitching of the outer loop with a noduplicate call.
+  call void @f() noduplicate
+  br label %inner_loop_begin
+; Ensure the inner loop got unswitched into the outer loop.
+; CHECK:       outer_loop_begin:
+; CHECK-NEXT:    %{{.*}} = phi i32
+; CHECK-NEXT:    call void @f()
+; CHECK-NEXT:    br i1 %cond,
+
+inner_loop_begin:
+  %j = phi i32 [ %j.next, %inner_loop_latch ], [ 0, %outer_loop_begin ]
+  br i1 %cond, label %inner_loop_latch, label %inner_loop_early_exit
+
+inner_loop_latch:
+  %j.next = add nsw i32 %j, 1
+  %j.cmp = icmp slt i32 %j.next, %m
+  br i1 %j.cmp, label %inner_loop_begin, label %inner_loop_late_exit
+
+inner_loop_early_exit:
+  %j.lcssa = phi i32 [ %i, %inner_loop_begin ]
+  br label %outer_loop_latch
+
+inner_loop_late_exit:
+  br label %outer_loop_latch
+
+outer_loop_latch:
+  %i.phi = phi i32 [ %j.lcssa, %inner_loop_early_exit ], [ %i, %inner_loop_late_exit ]
+  %i.next = add nsw i32 %i.phi, 1
+  %i.cmp = icmp slt i32 %i.next, %n
+  br i1 %i.cmp, label %outer_loop_begin, label %exit
+
+exit:
+  ret void
+}
+
+; Check that trivially unswitching an inner loop resets both the inner and outer
+; loop trip count.
+define void @test2(i32 %n, i32 %m, i32 %cond) {
+; Check that SCEV has no trip count before unswitching.
+; SCEV-LABEL: Determining loop execution counts for: @test2
+; SCEV: Loop %inner_loop_begin: <multiple exits> Unpredictable backedge-taken count.
+; SCEV: Loop %outer_loop_begin: Unpredictable backedge-taken count.
+;
+; Now check that after unswitching and simplifying instructions we get clean
+; backedge-taken counts.
+; SCEV-LABEL: Determining loop execution counts for: @test2
+; SCEV: Loop %inner_loop_begin: backedge-taken count is (-1 + (1 smax %m))<nsw>
+; SCEV: Loop %outer_loop_begin: backedge-taken count is (-1 + (1 smax %n))<nsw>
+;
+; CHECK-LABEL: define void @test2(
+entry:
+  br label %outer_loop_begin
+; Ensure the outer loop didn't get unswitched.
+; CHECK:       entry:
+; CHECK-NEXT:    br label %outer_loop_begin
+
+outer_loop_begin:
+  %i = phi i32 [ %i.next, %outer_loop_latch ], [ 0, %entry ]
+  ; Block unswitching of the outer loop with a noduplicate call.
+  call void @f() noduplicate
+  br label %inner_loop_begin
+; Ensure the inner loop got unswitched into the outer loop.
+; CHECK:       outer_loop_begin:
+; CHECK-NEXT:    %{{.*}} = phi i32
+; CHECK-NEXT:    call void @f()
+; CHECK-NEXT:    switch i32 %cond,
+
+inner_loop_begin:
+  %j = phi i32 [ %j.next, %inner_loop_latch ], [ 0, %outer_loop_begin ]
+  switch i32 %cond, label %inner_loop_early_exit [
+    i32 1, label %inner_loop_latch
+    i32 2, label %inner_loop_latch
+  ]
+
+inner_loop_latch:
+  %j.next = add nsw i32 %j, 1
+  %j.cmp = icmp slt i32 %j.next, %m
+  br i1 %j.cmp, label %inner_loop_begin, label %inner_loop_late_exit
+
+inner_loop_early_exit:
+  %j.lcssa = phi i32 [ %i, %inner_loop_begin ]
+  br label %outer_loop_latch
+
+inner_loop_late_exit:
+  br label %outer_loop_latch
+
+outer_loop_latch:
+  %i.phi = phi i32 [ %j.lcssa, %inner_loop_early_exit ], [ %i, %inner_loop_late_exit ]
+  %i.next = add nsw i32 %i.phi, 1
+  %i.cmp = icmp slt i32 %i.next, %n
+  br i1 %i.cmp, label %outer_loop_begin, label %exit
+
+exit:
+  ret void
+}
+
+; Check that non-trivial unswitching of a branch in an inner loop into the outer
+; loop invalidates both inner and outer.
+define void @test3(i32 %n, i32 %m, i1 %cond) {
+; Check that SCEV has no trip count before unswitching.
+; SCEV-LABEL: Determining loop execution counts for: @test3
+; SCEV: Loop %inner_loop_begin: <multiple exits> Unpredictable backedge-taken count.
+; SCEV: Loop %outer_loop_begin: Unpredictable backedge-taken count.
+;
+; Now check that after unswitching and simplifying instructions we get clean
+; backedge-taken counts.
+; SCEV-LABEL: Determining loop execution counts for: @test3
+; SCEV: Loop %inner_loop_begin{{.*}}: backedge-taken count is (-1 + (1 smax %m))<nsw>
+; SCEV: Loop %outer_loop_begin: backedge-taken count is (-1 + (1 smax %n))<nsw>
+;
+; And verify the code matches what we expect.
+; CHECK-LABEL: define void @test3(
+entry:
+  br label %outer_loop_begin
+; Ensure the outer loop didn't get unswitched.
+; CHECK:       entry:
+; CHECK-NEXT:    br label %outer_loop_begin
+
+outer_loop_begin:
+  %i = phi i32 [ %i.next, %outer_loop_latch ], [ 0, %entry ]
+  ; Block unswitching of the outer loop with a noduplicate call.
+  call void @f() noduplicate
+  br label %inner_loop_begin
+; Ensure the inner loop got unswitched into the outer loop.
+; CHECK:       outer_loop_begin:
+; CHECK-NEXT:    %{{.*}} = phi i32
+; CHECK-NEXT:    call void @f()
+; CHECK-NEXT:    br i1 %cond,
+
+inner_loop_begin:
+  %j = phi i32 [ %j.next, %inner_loop_latch ], [ 0, %outer_loop_begin ]
+  %j.tmp = add nsw i32 %j, 1
+  br i1 %cond, label %inner_loop_latch, label %inner_loop_early_exit
+
+inner_loop_latch:
+  %j.next = add nsw i32 %j, 1
+  %j.cmp = icmp slt i32 %j.next, %m
+  br i1 %j.cmp, label %inner_loop_begin, label %inner_loop_late_exit
+
+inner_loop_early_exit:
+  %j.lcssa = phi i32 [ %j.tmp, %inner_loop_begin ]
+  br label %outer_loop_latch
+
+inner_loop_late_exit:
+  br label %outer_loop_latch
+
+outer_loop_latch:
+  %inc.phi = phi i32 [ %j.lcssa, %inner_loop_early_exit ], [ 1, %inner_loop_late_exit ]
+  %i.next = add nsw i32 %i, %inc.phi
+  %i.cmp = icmp slt i32 %i.next, %n
+  br i1 %i.cmp, label %outer_loop_begin, label %exit
+
+exit:
+  ret void
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,19 @@
+; CFG Simplification is making a loop dead, then changing the add into:
+;
+;   %V1 = add int %V1, 1
+;
+; Which is not valid SSA
+;
+; RUN: opt < %s -simplifycfg | llvm-dis
+
+define void @test() {
+; <label>:0
+	br i1 true, label %end, label %Loop
+Loop:		; preds = %Loop, %0
+	%V = phi i32 [ 0, %0 ], [ %V1, %Loop ]		; <i32> [#uses=1]
+	%V1 = add i32 %V, 1		; <i32> [#uses=1]
+	br label %Loop
+end:		; preds = %0
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,13 @@
+; RUN: opt < %s -simplifycfg
+
+define i32 @test(i32 %A, i32 %B, i1 %cond) {
+J:
+	%C = add i32 %A, 12		; <i32> [#uses=3]
+	br i1 %cond, label %L, label %L
+L:		; preds = %J, %J
+	%Q = phi i32 [ %C, %J ], [ %C, %J ]		; <i32> [#uses=1]
+	%D = add i32 %C, %B		; <i32> [#uses=1]
+	%E = add i32 %Q, %D		; <i32> [#uses=1]
+	ret i32 %E
+}
+

Added: llvm/trunk/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,17 @@
+; RUN: opt < %s -simplifycfg -disable-output
+
+define void @test(i32* %ldo, i1 %c, i1 %d) {
+bb9:
+	br i1 %c, label %bb11, label %bb10
+bb10:		; preds = %bb9
+	br label %bb11
+bb11:		; preds = %bb10, %bb9
+	%reg330 = phi i32* [ null, %bb10 ], [ %ldo, %bb9 ]		; <i32*> [#uses=1]
+	br label %bb20
+bb20:		; preds = %bb20, %bb11
+	store i32* %reg330, i32** null
+	br i1 %d, label %bb20, label %done
+done:		; preds = %bb20
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; Do not remove the invoke!
+;
+; RUN: opt < %s -simplifycfg -disable-output
+
+define i32 @test() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+	%A = invoke i32 @test( )
+			to label %Ret unwind label %Ret2		; <i32> [#uses=1]
+Ret:		; preds = %0
+	ret i32 %A
+Ret2:		; preds = %0
+        %val = landingpad { i8*, i32 }
+                 catch i8* null
+	ret i32 undef
+}
+
+declare i32 @__gxx_personality_v0(...)

Added: llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; This test checks to make sure that 'br X, Dest, Dest' is folded into 
+; 'br Dest'
+
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @noop()
+
+; CHECK-NOT: br i1 %c2
+define i32 @test(i1 %c1, i1 %c2) {
+	call void @noop( )
+	br i1 %c1, label %A, label %Y
+A:		; preds = %0
+	call void @noop( )
+	br i1 %c2, label %X, label %X
+X:		; preds = %Y, %A, %A
+	call void @noop( )
+	ret i32 0
+Y:		; preds = %0
+	call void @noop( )
+	br label %X
+}
+

Added: llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; This test checks to make sure that 'br X, Dest, Dest' is folded into 
+; 'br Dest'.  This can only happen after the 'Z' block is eliminated.  This is
+; due to the fact that the SimplifyCFG function does not use 
+; the ConstantFoldTerminator function.
+
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: br i1 %c2
+
+declare void @noop()
+
+define i32 @test(i1 %c1, i1 %c2) {
+	call void @noop( )
+	br i1 %c1, label %A, label %Y
+A:		; preds = %0
+	call void @noop( )
+	br i1 %c2, label %Z, label %X
+Z:		; preds = %A
+	br label %X
+X:		; preds = %Y, %Z, %A
+	call void @noop( )
+	ret i32 0
+Y:		; preds = %0
+	call void @noop( )
+	br label %X
+}
+

Added: llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,57 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: switch
+        %llvm.dbg.anchor.type = type { i32, i32 }
+        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
+
+ at llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
+
+ at .str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+ at .str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
+ at .str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
+ at llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8], [6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8], [55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+; Test folding all to same dest
+define i32 @test3(i1 %C) {
+        br i1 %C, label %Start, label %TheDest
+Start:          ; preds = %0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        switch i32 3, label %TheDest [
+                 i32 0, label %TheDest
+                 i32 1, label %TheDest
+                 i32 2, label %TheDest
+                 i32 5, label %TheDest
+        ]
+TheDest:                ; preds = %Start, %Start, %Start, %Start, %Start, %0
+        ret i32 1234
+}
+
+; Test folding switch -> branch
+define i32 @test4(i32 %C) {
+        switch i32 %C, label %L1 [
+                 i32 0, label %L2
+        ]
+L1:             ; preds = %0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        ret i32 0
+L2:             ; preds = %0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        ret i32 1
+}
+
+; Can fold into a cond branch!
+define i32 @test5(i32 %C) {
+        switch i32 %C, label %L1 [
+                 i32 0, label %L2
+                 i32 123, label %L1
+        ]
+L1:             ; preds = %0, %0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        ret i32 0
+L2:             ; preds = %0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        ret i32 1
+}
+

Added: llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; Test normal folding
+define i32 @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  TheDest:
+; CHECK-NEXT:    ret i32 1234
+;
+  switch i32 5, label %Default [
+  i32 0, label %Foo
+  i32 1, label %Bar
+  i32 2, label %Baz
+  i32 5, label %TheDest
+  ]
+Default:
+  ret i32 -1
+Foo:
+  ret i32 -2
+Bar:
+  ret i32 -3
+Baz:
+  ret i32 -4
+TheDest:
+  ret i32 1234
+}
+
+; Test folding to default dest
+define i32 @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  Default:
+; CHECK-NEXT:    ret i32 1234
+;
+  switch i32 3, label %Default [
+  i32 0, label %Foo
+  i32 1, label %Bar
+  i32 2, label %Baz
+  i32 5, label %TheDest
+  ]
+Default:
+  ret i32 1234
+Foo:
+  ret i32 -2
+Bar:
+  ret i32 -5
+Baz:
+  ret i32 -6
+TheDest:
+  ret i32 -8
+}
+
+; Test folding all to same dest
+define i32 @test3(i1 %C) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  TheDest:
+; CHECK-NEXT:    ret i32 1234
+;
+  br i1 %C, label %Start, label %TheDest
+Start:          ; preds = %0
+  switch i32 3, label %TheDest [
+  i32 0, label %TheDest
+  i32 1, label %TheDest
+  i32 2, label %TheDest
+  i32 5, label %TheDest
+  ]
+TheDest:
+  ret i32 1234
+}
+
+; Test folding switch -> branch
+define i32 @test4(i32 %C) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  L1:
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 %C, 0
+; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[COND]], i32 1, i32 0
+; CHECK-NEXT:    ret i32 [[DOT]]
+;
+  switch i32 %C, label %L1 [
+  i32 0, label %L2
+  ]
+L1:
+  ret i32 0
+L2:
+  ret i32 1
+}
+
+; Can fold into a cond branch!
+define i32 @test5(i32 %C) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  L1:
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 %C, 0
+; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[COND]], i32 1, i32 0
+; CHECK-NEXT:    ret i32 [[DOT]]
+;
+  switch i32 %C, label %L1 [
+  i32 0, label %L2
+  i32 123, label %L1
+  ]
+L1:
+  ret i32 0
+L2:
+  ret i32 1
+}
+

Added: llvm/trunk/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt < %s -simplifycfg -disable-output
+
+define void @symhash_add() {
+entry:
+	br i1 undef, label %then.0, label %UnifiedReturnBlock
+then.0:		; preds = %entry
+	br i1 undef, label %loopentry.2, label %loopentry.1.preheader
+loopentry.1.preheader:		; preds = %then.0
+	br label %loopentry.1.outer
+loopentry.1.outer:		; preds = %loopexit.1, %loopentry.1.preheader
+	br label %loopentry.1
+loopentry.1:		; preds = %endif.1, %then.4, %then.3, %then.1, %loopentry.1.outer
+	br i1 undef, label %loopexit.1, label %no_exit.1
+no_exit.1:		; preds = %loopentry.1
+	br i1 undef, label %then.1, label %else.0
+then.1:		; preds = %no_exit.1
+	br label %loopentry.1
+else.0:		; preds = %no_exit.1
+	br i1 undef, label %then.2, label %else.1
+then.2:		; preds = %else.0
+	br i1 undef, label %then.3, label %endif.1
+then.3:		; preds = %then.2
+	br label %loopentry.1
+else.1:		; preds = %else.0
+	br i1 undef, label %endif.1, label %then.4
+then.4:		; preds = %else.1
+	br label %loopentry.1
+endif.1:		; preds = %else.1, %then.2
+	br label %loopentry.1
+loopexit.1:		; preds = %loopentry.1
+	br i1 undef, label %loopentry.1.outer, label %loopentry.2
+loopentry.2:		; preds = %no_exit.2, %loopexit.1, %then.0
+	br i1 undef, label %loopexit.2, label %no_exit.2
+no_exit.2:		; preds = %loopentry.2
+	br label %loopentry.2
+loopexit.2:		; preds = %loopentry.2
+	ret void
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,95 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; PR584
+ at g_38098584 = external global i32		; <i32*> [#uses=1]
+ at g_60187400 = external global i32		; <i32*> [#uses=1]
+ at g_59182229 = external global i32		; <i32*> [#uses=2]
+
+define i32 @_Z13func_26556482h(i8 %l_88173906) {
+entry:
+	%tmp.1 = bitcast i8 %l_88173906 to i8		; <i8> [#uses=2]
+	%tmp.3 = icmp eq i8 %l_88173906, 0		; <i1> [#uses=1]
+	br i1 %tmp.3, label %else.0, label %then.0
+then.0:		; preds = %entry
+	%tmp.5 = icmp eq i8 %l_88173906, 0		; <i1> [#uses=1]
+	br i1 %tmp.5, label %else.1, label %then.1
+then.1:		; preds = %then.0
+	br label %return
+else.1:		; preds = %then.0
+	br label %loopentry.0
+loopentry.0:		; preds = %no_exit.0, %else.1
+	%i.0.1 = phi i32 [ 0, %else.1 ], [ %inc.0, %no_exit.0 ]		; <i32> [#uses=2]
+	%tmp.9 = icmp sgt i32 %i.0.1, 99		; <i1> [#uses=1]
+	br i1 %tmp.9, label %endif.0, label %no_exit.0
+no_exit.0:		; preds = %loopentry.0
+	%inc.0 = add i32 %i.0.1, 1		; <i32> [#uses=1]
+	br label %loopentry.0
+else.0:		; preds = %entry
+	%tmp.12 = sext i8 %tmp.1 to i32		; <i32> [#uses=1]
+	br label %return
+endif.0:		; preds = %loopentry.0
+	%tmp.14 = sext i8 %tmp.1 to i32		; <i32> [#uses=1]
+	%tmp.16 = zext i8 %l_88173906 to i32		; <i32> [#uses=1]
+	%tmp.17 = icmp sgt i32 %tmp.14, %tmp.16		; <i1> [#uses=1]
+	%tmp.19 = load i32, i32* @g_59182229		; <i32> [#uses=2]
+	br i1 %tmp.17, label %cond_true, label %cond_false
+cond_true:		; preds = %endif.0
+	%tmp.20 = icmp ne i32 %tmp.19, 1		; <i1> [#uses=1]
+	br label %cond_continue
+cond_false:		; preds = %endif.0
+	%tmp.22 = icmp ne i32 %tmp.19, 0		; <i1> [#uses=1]
+	br label %cond_continue
+cond_continue:		; preds = %cond_false, %cond_true
+	%mem_tmp.0 = phi i1 [ %tmp.20, %cond_true ], [ %tmp.22, %cond_false ]		; <i1> [#uses=1]
+	br i1 %mem_tmp.0, label %then.2, label %else.2
+then.2:		; preds = %cond_continue
+	%tmp.25 = zext i8 %l_88173906 to i32		; <i32> [#uses=1]
+	br label %return
+else.2:		; preds = %cond_continue
+	br label %loopentry.1
+loopentry.1:		; preds = %endif.3, %else.2
+	%i.1.1 = phi i32 [ 0, %else.2 ], [ %inc.3, %endif.3 ]		; <i32> [#uses=2]
+	%i.3.2 = phi i32 [ undef, %else.2 ], [ %i.3.0, %endif.3 ]		; <i32> [#uses=2]
+	%l_88173906_addr.1 = phi i8 [ %l_88173906, %else.2 ], [ %l_88173906_addr.0, %endif.3 ]		; <i8> [#uses=3]
+	%tmp.29 = icmp sgt i32 %i.1.1, 99		; <i1> [#uses=1]
+	br i1 %tmp.29, label %endif.2, label %no_exit.1
+no_exit.1:		; preds = %loopentry.1
+	%tmp.30 = load i32, i32* @g_38098584		; <i32> [#uses=1]
+	%tmp.31 = icmp eq i32 %tmp.30, 0		; <i1> [#uses=1]
+	br i1 %tmp.31, label %else.3, label %then.3
+then.3:		; preds = %no_exit.1
+	br label %endif.3
+else.3:		; preds = %no_exit.1
+	br i1 false, label %else.4, label %then.4
+then.4:		; preds = %else.3
+	br label %endif.3
+else.4:		; preds = %else.3
+	br i1 false, label %else.5, label %then.5
+then.5:		; preds = %else.4
+	store i32 -1004318825, i32* @g_59182229
+	br label %return
+else.5:		; preds = %else.4
+	br label %loopentry.3
+loopentry.3:		; preds = %then.7, %else.5
+	%i.3.3 = phi i32 [ 0, %else.5 ], [ %inc.2, %then.7 ]		; <i32> [#uses=3]
+	%tmp.55 = icmp sgt i32 %i.3.3, 99		; <i1> [#uses=1]
+	br i1 %tmp.55, label %endif.3, label %no_exit.3
+no_exit.3:		; preds = %loopentry.3
+	%tmp.57 = icmp eq i8 %l_88173906_addr.1, 0		; <i1> [#uses=1]
+	br i1 %tmp.57, label %else.7, label %then.7
+then.7:		; preds = %no_exit.3
+	store i32 16239, i32* @g_60187400
+	%inc.2 = add i32 %i.3.3, 1		; <i32> [#uses=1]
+	br label %loopentry.3
+else.7:		; preds = %no_exit.3
+	br label %return
+endif.3:		; preds = %loopentry.3, %then.4, %then.3
+	%i.3.0 = phi i32 [ %i.3.2, %then.3 ], [ %i.3.2, %then.4 ], [ %i.3.3, %loopentry.3 ]		; <i32> [#uses=1]
+	%l_88173906_addr.0 = phi i8 [ 100, %then.3 ], [ %l_88173906_addr.1, %then.4 ], [ %l_88173906_addr.1, %loopentry.3 ]		; <i8> [#uses=1]
+	%inc.3 = add i32 %i.1.1, 1		; <i32> [#uses=1]
+	br label %loopentry.1
+endif.2:		; preds = %loopentry.1
+	br label %return
+return:		; preds = %endif.2, %else.7, %then.5, %then.2, %else.0, %then.1
+	%result.0 = phi i32 [ 1624650671, %then.1 ], [ %tmp.25, %then.2 ], [ 3379, %then.5 ], [ 52410, %else.7 ], [ -1526438411, %endif.2 ], [ %tmp.12, %else.0 ]		; <i32> [#uses=1]
+	ret i32 %result.0
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,71 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; END.
+
+define void @main() {
+entry:
+	%tmp.14.i19 = icmp eq i32 0, 2		; <i1> [#uses=1]
+	br i1 %tmp.14.i19, label %endif.1.i20, label %read_min.exit
+endif.1.i20:		; preds = %entry
+	%tmp.9.i.i = icmp eq i8* null, null		; <i1> [#uses=1]
+	br i1 %tmp.9.i.i, label %then.i12.i, label %then.i.i
+then.i.i:		; preds = %endif.1.i20
+	ret void
+then.i12.i:		; preds = %endif.1.i20
+	%tmp.9.i4.i = icmp eq i8* null, null		; <i1> [#uses=1]
+	br i1 %tmp.9.i4.i, label %endif.2.i33, label %then.i5.i
+then.i5.i:		; preds = %then.i12.i
+	ret void
+endif.2.i33:		; preds = %then.i12.i
+	br i1 false, label %loopexit.0.i40, label %no_exit.0.i35
+no_exit.0.i35:		; preds = %no_exit.0.i35, %endif.2.i33
+	%tmp.130.i = icmp slt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.130.i, label %loopexit.0.i40.loopexit, label %no_exit.0.i35
+loopexit.0.i40.loopexit:		; preds = %no_exit.0.i35
+	br label %loopexit.0.i40
+loopexit.0.i40:		; preds = %loopexit.0.i40.loopexit, %endif.2.i33
+	%tmp.341.i = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.341.i, label %loopentry.1.i, label %read_min.exit
+loopentry.1.i:		; preds = %loopexit.0.i40
+	%tmp.347.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.347.i, label %no_exit.1.i41, label %loopexit.2.i44
+no_exit.1.i41:		; preds = %endif.5.i, %loopentry.1.i
+	%indvar.i42 = phi i32 [ %indvar.next.i, %endif.5.i ], [ 0, %loopentry.1.i ]		; <i32> [#uses=1]
+	%tmp.355.i = icmp eq i32 0, 3		; <i1> [#uses=1]
+	br i1 %tmp.355.i, label %endif.5.i, label %read_min.exit
+endif.5.i:		; preds = %no_exit.1.i41
+	%tmp.34773.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
+	%indvar.next.i = add i32 %indvar.i42, 1		; <i32> [#uses=1]
+	br i1 %tmp.34773.i, label %no_exit.1.i41, label %loopexit.1.i.loopexit
+loopexit.1.i.loopexit:		; preds = %endif.5.i
+	ret void
+loopexit.2.i44:		; preds = %loopentry.1.i
+	ret void
+read_min.exit:		; preds = %no_exit.1.i41, %loopexit.0.i40, %entry
+	%tmp.23 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.23, label %endif.1, label %then.1
+then.1:		; preds = %read_min.exit
+	br i1 false, label %endif.0.i, label %then.0.i
+then.0.i:		; preds = %then.1
+	br i1 false, label %endif.1.i, label %then.1.i
+endif.0.i:		; preds = %then.1
+	br i1 false, label %endif.1.i, label %then.1.i
+then.1.i:		; preds = %endif.0.i, %then.0.i
+	br i1 false, label %getfree.exit, label %then.2.i
+endif.1.i:		; preds = %endif.0.i, %then.0.i
+	br i1 false, label %getfree.exit, label %then.2.i
+then.2.i:		; preds = %endif.1.i, %then.1.i
+	ret void
+getfree.exit:		; preds = %endif.1.i, %then.1.i
+	ret void
+endif.1:		; preds = %read_min.exit
+	%tmp.27.i = getelementptr i32, i32* null, i32 0		; <i32*> [#uses=0]
+	br i1 false, label %loopexit.0.i15, label %no_exit.0.i14
+no_exit.0.i14:		; preds = %endif.1
+	ret void
+loopexit.0.i15:		; preds = %endif.1
+	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
+no_exit.1.i16:		; preds = %no_exit.1.i16, %loopexit.0.i15
+	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
+primal_start_artificial.exit:		; preds = %no_exit.1.i16, %loopexit.0.i15
+	ret void
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt < %s -simplifycfg -disable-output
+
+define i1 @foo() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+	%X = invoke i1 @foo( )
+			to label %N unwind label %F		; <i1> [#uses=1]
+F:		; preds = %0
+        %val = landingpad { i8*, i32 }
+                 catch i8* null
+	ret i1 false
+N:		; preds = %0
+	br i1 %X, label %A, label %B
+A:		; preds = %N
+	ret i1 true
+B:		; preds = %N
+	ret i1 true
+}
+
+declare i32 @__gxx_personality_v0(...)

Added: llvm/trunk/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,123 @@
+; Make sure this doesn't turn into an infinite loop
+
+; RUN: opt < %s -simplifycfg -constprop -simplifycfg | llvm-dis | FileCheck %s
+
+%struct.anon = type { i32, i32, i32, i32, [1024 x i8] }
+ at _zero_ = external global %struct.anon*		; <%struct.anon**> [#uses=2]
+ at _one_ = external global %struct.anon*		; <%struct.anon**> [#uses=4]
+ at str = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=0]
+
+declare i32 @bc_compare(%struct.anon*, %struct.anon*)
+
+declare void @free_num(%struct.anon**)
+
+declare %struct.anon* @copy_num(%struct.anon*)
+
+declare void @init_num(%struct.anon**)
+
+declare %struct.anon* @new_num(i32, i32)
+
+declare void @int2num(%struct.anon**, i32)
+
+declare void @bc_multiply(%struct.anon*, %struct.anon*, %struct.anon**, i32)
+
+declare void @bc_raise(%struct.anon*, %struct.anon*, %struct.anon**, i32)
+
+declare i32 @bc_divide(%struct.anon*, %struct.anon*, %struct.anon**, i32)
+
+declare void @bc_add(%struct.anon*, %struct.anon*, %struct.anon**)
+
+declare i32 @_do_compare(%struct.anon*, %struct.anon*, i32, i32)
+
+declare i32 @printf(i8*, ...)
+
+define i32 @bc_sqrt(%struct.anon** %num, i32 %scale) {
+entry:
+	%guess = alloca %struct.anon*		; <%struct.anon**> [#uses=7]
+	%guess1 = alloca %struct.anon*		; <%struct.anon**> [#uses=7]
+	%point5 = alloca %struct.anon*		; <%struct.anon**> [#uses=3]
+	%tmp = load %struct.anon*, %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp1 = load %struct.anon*, %struct.anon** @_zero_		; <%struct.anon*> [#uses=1]
+	%tmp.upgrd.1 = call i32 @bc_compare( %struct.anon* %tmp, %struct.anon* %tmp1 )		; <i32> [#uses=2]
+	%tmp.upgrd.2 = icmp slt i32 %tmp.upgrd.1, 0		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.2, label %cond_true, label %cond_false
+cond_true:		; preds = %entry
+	ret i32 0
+cond_false:		; preds = %entry
+	%tmp5 = icmp eq i32 %tmp.upgrd.1, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true6, label %cond_next13
+cond_true6:		; preds = %cond_false
+	call void @free_num( %struct.anon** %num )
+	%tmp8 = load %struct.anon*, %struct.anon** @_zero_		; <%struct.anon*> [#uses=1]
+	%tmp9 = call %struct.anon* @copy_num( %struct.anon* %tmp8 )		; <%struct.anon*> [#uses=1]
+	store %struct.anon* %tmp9, %struct.anon** %num
+	ret i32 1
+cond_next13:		; preds = %cond_false
+	%tmp15 = load %struct.anon*, %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp16 = load %struct.anon*, %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp17 = call i32 @bc_compare( %struct.anon* %tmp15, %struct.anon* %tmp16 )		; <i32> [#uses=2]
+	%tmp19 = icmp eq i32 %tmp17, 0		; <i1> [#uses=1]
+	br i1 %tmp19, label %cond_true20, label %cond_next27
+cond_true20:		; preds = %cond_next13
+	call void @free_num( %struct.anon** %num )
+	%tmp22 = load %struct.anon*, %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp23 = call %struct.anon* @copy_num( %struct.anon* %tmp22 )		; <%struct.anon*> [#uses=1]
+	store %struct.anon* %tmp23, %struct.anon** %num
+	ret i32 1
+cond_next27:		; preds = %cond_next13
+	%tmp29 = load %struct.anon*, %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp30 = getelementptr %struct.anon, %struct.anon* %tmp29, i32 0, i32 2		; <i32*> [#uses=1]
+	%tmp31 = load i32, i32* %tmp30		; <i32> [#uses=2]
+	%tmp33 = icmp sge i32 %tmp31, %scale		; <i1> [#uses=1]
+	%max = select i1 %tmp33, i32 %tmp31, i32 %scale		; <i32> [#uses=4]
+	%tmp35 = add i32 %max, 2		; <i32> [#uses=0]
+	call void @init_num( %struct.anon** %guess )
+	call void @init_num( %struct.anon** %guess1 )
+	%tmp36 = call %struct.anon* @new_num( i32 1, i32 1 )		; <%struct.anon*> [#uses=2]
+	store %struct.anon* %tmp36, %struct.anon** %point5
+	%tmp.upgrd.3 = getelementptr %struct.anon, %struct.anon* %tmp36, i32 0, i32 4, i32 1		; <i8*> [#uses=1]
+	store i8 5, i8* %tmp.upgrd.3
+	%tmp39 = icmp slt i32 %tmp17, 0		; <i1> [#uses=1]
+	br i1 %tmp39, label %cond_true40, label %cond_false43
+cond_true40:		; preds = %cond_next27
+	%tmp41 = load %struct.anon*, %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp42 = call %struct.anon* @copy_num( %struct.anon* %tmp41 )		; <%struct.anon*> [#uses=1]
+	store %struct.anon* %tmp42, %struct.anon** %guess
+	br label %bb80.outer
+cond_false43:		; preds = %cond_next27
+	call void @int2num( %struct.anon** %guess, i32 10 )
+	%tmp45 = load %struct.anon*, %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp46 = getelementptr %struct.anon, %struct.anon* %tmp45, i32 0, i32 1		; <i32*> [#uses=1]
+	%tmp47 = load i32, i32* %tmp46		; <i32> [#uses=1]
+	call void @int2num( %struct.anon** %guess1, i32 %tmp47 )
+	%tmp48 = load %struct.anon*, %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
+	%tmp49 = load %struct.anon*, %struct.anon** %point5		; <%struct.anon*> [#uses=1]
+	call void @bc_multiply( %struct.anon* %tmp48, %struct.anon* %tmp49, %struct.anon** %guess1, i32 %max )
+	%tmp51 = load %struct.anon*, %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
+	%tmp52 = getelementptr %struct.anon, %struct.anon* %tmp51, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp52
+	%tmp53 = load %struct.anon*, %struct.anon** %guess		; <%struct.anon*> [#uses=1]
+	%tmp54 = load %struct.anon*, %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
+	call void @bc_raise( %struct.anon* %tmp53, %struct.anon* %tmp54, %struct.anon** %guess, i32 %max )
+	br label %bb80.outer
+bb80.outer:		; preds = %cond_true83, %cond_false43, %cond_true40
+	%done.1.ph = phi i32 [ 1, %cond_true83 ], [ 0, %cond_true40 ], [ 0, %cond_false43 ]		; <i32> [#uses=1]
+	br label %bb80
+bb80:		; preds = %cond_true83, %bb80.outer
+	%tmp82 = icmp eq i32 %done.1.ph, 0		; <i1> [#uses=1]
+	br i1 %tmp82, label %cond_true83, label %bb86
+cond_true83:		; preds = %bb80
+	%tmp71 = call i32 @_do_compare( %struct.anon* null, %struct.anon* null, i32 0, i32 1 )		; <i32> [#uses=1]
+	%tmp76 = icmp eq i32 %tmp71, 0		; <i1> [#uses=1]
+	br i1 %tmp76, label %bb80.outer, label %bb80
+; CHECK: bb86
+bb86:		; preds = %bb80
+	call void @free_num( %struct.anon** %num )
+	%tmp88 = load %struct.anon*, %struct.anon** %guess		; <%struct.anon*> [#uses=1]
+	%tmp89 = load %struct.anon*, %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp92 = call i32 @bc_divide( %struct.anon* %tmp88, %struct.anon* %tmp89, %struct.anon** %num, i32 %max )		; <i32> [#uses=0]
+	call void @free_num( %struct.anon** %guess )
+	call void @free_num( %struct.anon** %guess1 )
+	call void @free_num( %struct.anon** %point5 )
+	ret i32 1
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; RUN: opt < %s -simplifycfg -disable-output
+
+define void @polnel_() {
+entry:
+	%tmp595 = icmp slt i32 0, 0		; <i1> [#uses=4]
+	br i1 %tmp595, label %bb148.critedge, label %cond_true40
+bb36:		; preds = %bb43
+	br i1 %tmp595, label %bb43, label %cond_true40
+cond_true40:		; preds = %bb46, %cond_true40, %bb36, %entry
+	%tmp397 = icmp sgt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp397, label %bb43, label %cond_true40
+bb43:		; preds = %cond_true40, %bb36
+	br i1 false, label %bb53, label %bb36
+bb46:		; preds = %bb53
+	br i1 %tmp595, label %bb53, label %cond_true40
+bb53:		; preds = %bb46, %bb43
+	br i1 false, label %bb102, label %bb46
+bb92.preheader:		; preds = %bb102
+	ret void
+bb102:		; preds = %bb53
+	br i1 %tmp595, label %bb148, label %bb92.preheader
+bb148.critedge:		; preds = %entry
+	ret void
+bb148:		; preds = %bb102
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,413 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; END.
+
+define void @main(i32 %c) {
+entry:
+	%tmp.9 = icmp eq i32 %c, 2		; <i1> [#uses=1]
+	br i1 %tmp.9, label %endif.0, label %then.0
+then.0:		; preds = %entry
+	ret void
+endif.0:		; preds = %entry
+	br i1 false, label %then.1, label %endif.1
+then.1:		; preds = %endif.0
+	ret void
+endif.1:		; preds = %endif.0
+	br i1 false, label %then.2, label %endif.2
+then.2:		; preds = %endif.1
+	ret void
+endif.2:		; preds = %endif.1
+	br i1 false, label %then.3, label %loopentry.0
+then.3:		; preds = %endif.2
+	ret void
+loopentry.0:		; preds = %endif.2
+	br i1 false, label %no_exit.0.preheader, label %loopexit.0
+no_exit.0.preheader:		; preds = %loopentry.0
+	br label %no_exit.0
+no_exit.0:		; preds = %endif.4, %no_exit.0.preheader
+	br i1 false, label %then.4, label %endif.4
+then.4:		; preds = %no_exit.0
+	ret void
+endif.4:		; preds = %no_exit.0
+	br i1 false, label %no_exit.0, label %loopexit.0.loopexit
+loopexit.0.loopexit:		; preds = %endif.4
+	br label %loopexit.0
+loopexit.0:		; preds = %loopexit.0.loopexit, %loopentry.0
+	br i1 false, label %then.5, label %loopentry.1
+then.5:		; preds = %loopexit.0
+	ret void
+loopentry.1:		; preds = %loopexit.0
+	%tmp.143 = icmp sgt i32 0, 0		; <i1> [#uses=4]
+	br i1 %tmp.143, label %no_exit.1.preheader, label %loopexit.1
+no_exit.1.preheader:		; preds = %loopentry.1
+	br label %no_exit.1
+no_exit.1:		; preds = %endif.6, %no_exit.1.preheader
+	br i1 false, label %then.6, label %shortcirc_next.3
+shortcirc_next.3:		; preds = %no_exit.1
+	br i1 false, label %then.6, label %shortcirc_next.4
+shortcirc_next.4:		; preds = %shortcirc_next.3
+	br i1 false, label %then.6, label %endif.6
+then.6:		; preds = %shortcirc_next.4, %shortcirc_next.3, %no_exit.1
+	ret void
+endif.6:		; preds = %shortcirc_next.4
+	br i1 false, label %no_exit.1, label %loopexit.1.loopexit
+loopexit.1.loopexit:		; preds = %endif.6
+	br label %loopexit.1
+loopexit.1:		; preds = %loopexit.1.loopexit, %loopentry.1
+	br i1 false, label %then.i, label %loopentry.0.i
+then.i:		; preds = %loopexit.1
+	ret void
+loopentry.0.i:		; preds = %loopexit.1
+	br i1 %tmp.143, label %no_exit.0.i.preheader, label %readvector.exit
+no_exit.0.i.preheader:		; preds = %loopentry.0.i
+	br label %no_exit.0.i
+no_exit.0.i:		; preds = %loopexit.1.i, %no_exit.0.i.preheader
+	br i1 false, label %no_exit.1.i.preheader, label %loopexit.1.i
+no_exit.1.i.preheader:		; preds = %no_exit.0.i
+	br label %no_exit.1.i
+no_exit.1.i:		; preds = %loopexit.2.i, %no_exit.1.i.preheader
+	br i1 false, label %no_exit.2.i.preheader, label %loopexit.2.i
+no_exit.2.i.preheader:		; preds = %no_exit.1.i
+	br label %no_exit.2.i
+no_exit.2.i:		; preds = %no_exit.2.i, %no_exit.2.i.preheader
+	br i1 false, label %no_exit.2.i, label %loopexit.2.i.loopexit
+loopexit.2.i.loopexit:		; preds = %no_exit.2.i
+	br label %loopexit.2.i
+loopexit.2.i:		; preds = %loopexit.2.i.loopexit, %no_exit.1.i
+	br i1 false, label %no_exit.1.i, label %loopexit.1.i.loopexit
+loopexit.1.i.loopexit:		; preds = %loopexit.2.i
+	br label %loopexit.1.i
+loopexit.1.i:		; preds = %loopexit.1.i.loopexit, %no_exit.0.i
+	br i1 false, label %no_exit.0.i, label %readvector.exit.loopexit
+readvector.exit.loopexit:		; preds = %loopexit.1.i
+	br label %readvector.exit
+readvector.exit:		; preds = %readvector.exit.loopexit, %loopentry.0.i
+	br i1 %tmp.143, label %loopentry.1.preheader.i, label %loopexit.0.i
+loopentry.1.preheader.i:		; preds = %readvector.exit
+	br label %loopentry.1.outer.i
+loopentry.1.outer.i:		; preds = %loopexit.1.i110, %loopentry.1.preheader.i
+	br label %loopentry.1.i85
+loopentry.1.i85.loopexit:		; preds = %hamming.exit16.i
+	br label %loopentry.1.i85
+loopentry.1.i85:		; preds = %loopentry.1.i85.loopexit, %loopentry.1.outer.i
+	br i1 false, label %no_exit.1.preheader.i, label %loopexit.1.i110.loopexit1
+no_exit.1.preheader.i:		; preds = %loopentry.1.i85
+	br label %no_exit.1.i87
+no_exit.1.i87:		; preds = %then.1.i107, %no_exit.1.preheader.i
+	br i1 false, label %no_exit.i.i101.preheader, label %hamming.exit.i104
+no_exit.i.i101.preheader:		; preds = %no_exit.1.i87
+	br label %no_exit.i.i101
+no_exit.i.i101:		; preds = %no_exit.i.i101, %no_exit.i.i101.preheader
+	br i1 false, label %no_exit.i.i101, label %hamming.exit.i104.loopexit
+hamming.exit.i104.loopexit:		; preds = %no_exit.i.i101
+	br label %hamming.exit.i104
+hamming.exit.i104:		; preds = %hamming.exit.i104.loopexit, %no_exit.1.i87
+	br i1 false, label %no_exit.i15.i.preheader, label %hamming.exit16.i
+no_exit.i15.i.preheader:		; preds = %hamming.exit.i104
+	br label %no_exit.i15.i
+no_exit.i15.i:		; preds = %no_exit.i15.i, %no_exit.i15.i.preheader
+	br i1 false, label %no_exit.i15.i, label %hamming.exit16.i.loopexit
+hamming.exit16.i.loopexit:		; preds = %no_exit.i15.i
+	br label %hamming.exit16.i
+hamming.exit16.i:		; preds = %hamming.exit16.i.loopexit, %hamming.exit.i104
+	br i1 false, label %loopentry.1.i85.loopexit, label %then.1.i107
+then.1.i107:		; preds = %hamming.exit16.i
+	br i1 false, label %no_exit.1.i87, label %loopexit.1.i110.loopexit
+loopexit.1.i110.loopexit:		; preds = %then.1.i107
+	br label %loopexit.1.i110
+loopexit.1.i110.loopexit1:		; preds = %loopentry.1.i85
+	br label %loopexit.1.i110
+loopexit.1.i110:		; preds = %loopexit.1.i110.loopexit1, %loopexit.1.i110.loopexit
+	br i1 false, label %loopentry.1.outer.i, label %loopexit.0.i.loopexit
+loopexit.0.i.loopexit:		; preds = %loopexit.1.i110
+	br label %loopexit.0.i
+loopexit.0.i:		; preds = %loopexit.0.i.loopexit, %readvector.exit
+	br i1 false, label %UnifiedReturnBlock.i113, label %then.2.i112
+then.2.i112:		; preds = %loopexit.0.i
+	br label %checkham.exit
+UnifiedReturnBlock.i113:		; preds = %loopexit.0.i
+	br label %checkham.exit
+checkham.exit:		; preds = %UnifiedReturnBlock.i113, %then.2.i112
+	br i1 false, label %loopentry.1.i14.preheader, label %loopentry.3.i.preheader
+loopentry.1.i14.preheader:		; preds = %checkham.exit
+	br label %loopentry.1.i14
+loopentry.1.i14:		; preds = %loopexit.1.i18, %loopentry.1.i14.preheader
+	br i1 false, label %no_exit.1.i16.preheader, label %loopexit.1.i18
+no_exit.1.i16.preheader:		; preds = %loopentry.1.i14
+	br label %no_exit.1.i16
+no_exit.1.i16:		; preds = %no_exit.1.i16, %no_exit.1.i16.preheader
+	br i1 false, label %no_exit.1.i16, label %loopexit.1.i18.loopexit
+loopexit.1.i18.loopexit:		; preds = %no_exit.1.i16
+	br label %loopexit.1.i18
+loopexit.1.i18:		; preds = %loopexit.1.i18.loopexit, %loopentry.1.i14
+	br i1 false, label %loopentry.1.i14, label %loopentry.3.i.loopexit
+loopentry.3.i.loopexit:		; preds = %loopexit.1.i18
+	br label %loopentry.3.i.preheader
+loopentry.3.i.preheader:		; preds = %loopentry.3.i.loopexit, %checkham.exit
+	br label %loopentry.3.i
+loopentry.3.i:		; preds = %endif.1.i, %loopentry.3.i.preheader
+	br i1 false, label %loopentry.4.i.preheader, label %endif.1.i
+loopentry.4.i.preheader:		; preds = %loopentry.3.i
+	br label %loopentry.4.i
+loopentry.4.i:		; preds = %loopexit.4.i, %loopentry.4.i.preheader
+	br i1 false, label %no_exit.4.i.preheader, label %loopexit.4.i
+no_exit.4.i.preheader:		; preds = %loopentry.4.i
+	br label %no_exit.4.i
+no_exit.4.i:		; preds = %no_exit.4.i.backedge, %no_exit.4.i.preheader
+	br i1 false, label %endif.0.i, label %else.i
+else.i:		; preds = %no_exit.4.i
+	br i1 false, label %no_exit.4.i.backedge, label %loopexit.4.i.loopexit
+no_exit.4.i.backedge:		; preds = %endif.0.i, %else.i
+	br label %no_exit.4.i
+endif.0.i:		; preds = %no_exit.4.i
+	br i1 false, label %no_exit.4.i.backedge, label %loopexit.4.i.loopexit
+loopexit.4.i.loopexit:		; preds = %endif.0.i, %else.i
+	br label %loopexit.4.i
+loopexit.4.i:		; preds = %loopexit.4.i.loopexit, %loopentry.4.i
+	br i1 false, label %loopentry.4.i, label %endif.1.i.loopexit
+endif.1.i.loopexit:		; preds = %loopexit.4.i
+	br label %endif.1.i
+endif.1.i:		; preds = %endif.1.i.loopexit, %loopentry.3.i
+	%exitcond = icmp eq i32 0, 10		; <i1> [#uses=1]
+	br i1 %exitcond, label %generateT.exit, label %loopentry.3.i
+generateT.exit:		; preds = %endif.1.i
+	br i1 false, label %then.0.i, label %loopentry.1.i30.preheader
+then.0.i:		; preds = %generateT.exit
+	ret void
+loopentry.1.i30.loopexit:		; preds = %loopexit.3.i
+	br label %loopentry.1.i30.backedge
+loopentry.1.i30.preheader:		; preds = %generateT.exit
+	br label %loopentry.1.i30
+loopentry.1.i30:		; preds = %loopentry.1.i30.backedge, %loopentry.1.i30.preheader
+	br i1 %tmp.143, label %no_exit.0.i31.preheader, label %loopentry.1.i30.backedge
+loopentry.1.i30.backedge:		; preds = %loopentry.1.i30, %loopentry.1.i30.loopexit
+	br label %loopentry.1.i30
+no_exit.0.i31.preheader:		; preds = %loopentry.1.i30
+	br label %no_exit.0.i31
+no_exit.0.i31:		; preds = %loopexit.3.i, %no_exit.0.i31.preheader
+	br i1 false, label %then.1.i, label %else.0.i
+then.1.i:		; preds = %no_exit.0.i31
+	br i1 undef, label %then.0.i29, label %loopentry.0.i31
+then.0.i29:		; preds = %then.1.i
+	unreachable
+loopentry.0.i31:		; preds = %then.1.i
+	br i1 false, label %no_exit.0.i38.preheader, label %loopentry.1.i.preheader
+no_exit.0.i38.preheader:		; preds = %loopentry.0.i31
+	br label %no_exit.0.i38
+no_exit.0.i38:		; preds = %no_exit.0.i38, %no_exit.0.i38.preheader
+	br i1 undef, label %no_exit.0.i38, label %loopentry.1.i.preheader.loopexit
+loopentry.1.i.preheader.loopexit:		; preds = %no_exit.0.i38
+	br label %loopentry.1.i.preheader
+loopentry.1.i.preheader:		; preds = %loopentry.1.i.preheader.loopexit, %loopentry.0.i31
+	br label %loopentry.1.i
+loopentry.1.i:		; preds = %endif.2.i, %loopentry.1.i.preheader
+	br i1 undef, label %loopentry.2.i39.preheader, label %loopexit.1.i79.loopexit2
+loopentry.2.i39.preheader:		; preds = %loopentry.1.i
+	br label %loopentry.2.i39
+loopentry.2.i39:		; preds = %loopexit.5.i77, %loopentry.2.i39.preheader
+	br i1 false, label %loopentry.3.i40.preheader, label %hamming.exit.i71
+loopentry.3.i40.preheader:		; preds = %loopentry.2.i39
+	br label %loopentry.3.i40
+loopentry.3.i40:		; preds = %loopexit.3.i51, %loopentry.3.i40.preheader
+	br i1 false, label %no_exit.3.preheader.i42, label %loopexit.3.i51
+no_exit.3.preheader.i42:		; preds = %loopentry.3.i40
+	br label %no_exit.3.i49
+no_exit.3.i49:		; preds = %no_exit.3.i49, %no_exit.3.preheader.i42
+	br i1 undef, label %no_exit.3.i49, label %loopexit.3.i51.loopexit
+loopexit.3.i51.loopexit:		; preds = %no_exit.3.i49
+	br label %loopexit.3.i51
+loopexit.3.i51:		; preds = %loopexit.3.i51.loopexit, %loopentry.3.i40
+	br i1 undef, label %loopentry.3.i40, label %loopentry.4.i52
+loopentry.4.i52:		; preds = %loopexit.3.i51
+	br i1 false, label %no_exit.4.i54.preheader, label %hamming.exit.i71
+no_exit.4.i54.preheader:		; preds = %loopentry.4.i52
+	br label %no_exit.4.i54
+no_exit.4.i54:		; preds = %no_exit.4.backedge.i, %no_exit.4.i54.preheader
+	br i1 undef, label %then.1.i55, label %endif.1.i56
+then.1.i55:		; preds = %no_exit.4.i54
+	br i1 undef, label %no_exit.4.backedge.i, label %loopexit.4.i57
+no_exit.4.backedge.i:		; preds = %endif.1.i56, %then.1.i55
+	br label %no_exit.4.i54
+endif.1.i56:		; preds = %no_exit.4.i54
+	br i1 undef, label %no_exit.4.backedge.i, label %loopexit.4.i57
+loopexit.4.i57:		; preds = %endif.1.i56, %then.1.i55
+	br i1 false, label %no_exit.i.i69.preheader, label %hamming.exit.i71
+no_exit.i.i69.preheader:		; preds = %loopexit.4.i57
+	br label %no_exit.i.i69
+no_exit.i.i69:		; preds = %no_exit.i.i69, %no_exit.i.i69.preheader
+	br i1 undef, label %no_exit.i.i69, label %hamming.exit.i71.loopexit
+hamming.exit.i71.loopexit:		; preds = %no_exit.i.i69
+	br label %hamming.exit.i71
+hamming.exit.i71:		; preds = %hamming.exit.i71.loopexit, %loopexit.4.i57, %loopentry.4.i52, %loopentry.2.i39
+	br i1 undef, label %endif.2.i, label %loopentry.5.i72
+loopentry.5.i72:		; preds = %hamming.exit.i71
+	br i1 false, label %shortcirc_next.i74.preheader, label %loopexit.5.i77
+shortcirc_next.i74.preheader:		; preds = %loopentry.5.i72
+	br label %shortcirc_next.i74
+shortcirc_next.i74:		; preds = %no_exit.5.i76, %shortcirc_next.i74.preheader
+	br i1 undef, label %no_exit.5.i76, label %loopexit.5.i77.loopexit
+no_exit.5.i76:		; preds = %shortcirc_next.i74
+	br i1 undef, label %shortcirc_next.i74, label %loopexit.5.i77.loopexit
+loopexit.5.i77.loopexit:		; preds = %no_exit.5.i76, %shortcirc_next.i74
+	br label %loopexit.5.i77
+loopexit.5.i77:		; preds = %loopexit.5.i77.loopexit, %loopentry.5.i72
+	br i1 undef, label %loopentry.2.i39, label %loopexit.1.i79.loopexit
+endif.2.i:		; preds = %hamming.exit.i71
+	br label %loopentry.1.i
+loopexit.1.i79.loopexit:		; preds = %loopexit.5.i77
+	br label %loopexit.1.i79
+loopexit.1.i79.loopexit2:		; preds = %loopentry.1.i
+	br label %loopexit.1.i79
+loopexit.1.i79:		; preds = %loopexit.1.i79.loopexit2, %loopexit.1.i79.loopexit
+	br i1 undef, label %then.3.i, label %loopentry.6.i80
+then.3.i:		; preds = %loopexit.1.i79
+	br i1 false, label %no_exit.6.i82.preheader, label %run.exit
+loopentry.6.i80:		; preds = %loopexit.1.i79
+	br i1 false, label %no_exit.6.i82.preheader, label %run.exit
+no_exit.6.i82.preheader:		; preds = %loopentry.6.i80, %then.3.i
+	br label %no_exit.6.i82
+no_exit.6.i82:		; preds = %no_exit.6.i82, %no_exit.6.i82.preheader
+	br i1 undef, label %no_exit.6.i82, label %run.exit.loopexit
+run.exit.loopexit:		; preds = %no_exit.6.i82
+	br label %run.exit
+run.exit:		; preds = %run.exit.loopexit, %loopentry.6.i80, %then.3.i
+	br i1 false, label %no_exit.1.i36.preheader, label %loopentry.3.i37
+else.0.i:		; preds = %no_exit.0.i31
+	br i1 false, label %then.0.i4, label %loopentry.0.i6
+then.0.i4:		; preds = %else.0.i
+	unreachable
+loopentry.0.i6:		; preds = %else.0.i
+	br i1 false, label %no_exit.0.i8.preheader, label %loopentry.2.i.preheader
+no_exit.0.i8.preheader:		; preds = %loopentry.0.i6
+	br label %no_exit.0.i8
+no_exit.0.i8:		; preds = %no_exit.0.i8, %no_exit.0.i8.preheader
+	br i1 false, label %no_exit.0.i8, label %loopentry.2.i.preheader.loopexit
+loopentry.2.i.preheader.loopexit:		; preds = %no_exit.0.i8
+	br label %loopentry.2.i.preheader
+loopentry.2.i.preheader:		; preds = %loopentry.2.i.preheader.loopexit, %loopentry.0.i6
+	br label %loopentry.2.i
+loopentry.2.i:		; preds = %endif.3.i19, %loopentry.2.i.preheader
+	br i1 false, label %loopentry.3.i10.preheader, label %loopentry.4.i15
+loopentry.3.i10.preheader:		; preds = %loopentry.2.i
+	br label %loopentry.3.i10
+loopentry.3.i10:		; preds = %loopexit.3.i14, %loopentry.3.i10.preheader
+	br i1 false, label %no_exit.3.preheader.i, label %loopexit.3.i14
+no_exit.3.preheader.i:		; preds = %loopentry.3.i10
+	br label %no_exit.3.i12
+no_exit.3.i12:		; preds = %no_exit.3.i12, %no_exit.3.preheader.i
+	br i1 false, label %no_exit.3.i12, label %loopexit.3.i14.loopexit
+loopexit.3.i14.loopexit:		; preds = %no_exit.3.i12
+	br label %loopexit.3.i14
+loopexit.3.i14:		; preds = %loopexit.3.i14.loopexit, %loopentry.3.i10
+	br i1 false, label %loopentry.3.i10, label %loopentry.4.i15.loopexit
+loopentry.4.i15.loopexit:		; preds = %loopexit.3.i14
+	br label %loopentry.4.i15
+loopentry.4.i15:		; preds = %loopentry.4.i15.loopexit, %loopentry.2.i
+	br i1 false, label %loopentry.5.outer.i.preheader, label %loopentry.7.i
+loopentry.5.outer.i.preheader:		; preds = %loopentry.4.i15
+	br label %loopentry.5.outer.i
+loopentry.5.outer.i:		; preds = %loopexit.5.i, %loopentry.5.outer.i.preheader
+	br label %loopentry.5.i
+loopentry.5.i:		; preds = %endif.1.i18, %loopentry.5.outer.i
+	br i1 false, label %no_exit.5.i.preheader, label %loopexit.5.i.loopexit3
+no_exit.5.i.preheader:		; preds = %loopentry.5.i
+	br label %no_exit.5.i
+no_exit.5.i:		; preds = %then.2.i, %no_exit.5.i.preheader
+	br i1 false, label %loopentry.6.i, label %endif.1.i18
+loopentry.6.i:		; preds = %no_exit.5.i
+	br i1 false, label %no_exit.6.preheader.i, label %loopexit.6.i
+no_exit.6.preheader.i:		; preds = %loopentry.6.i
+	br label %no_exit.6.i
+no_exit.6.i:		; preds = %no_exit.6.i, %no_exit.6.preheader.i
+	br i1 false, label %no_exit.6.i, label %loopexit.6.i.loopexit
+loopexit.6.i.loopexit:		; preds = %no_exit.6.i
+	br label %loopexit.6.i
+loopexit.6.i:		; preds = %loopexit.6.i.loopexit, %loopentry.6.i
+	br i1 false, label %then.2.i, label %endif.1.i18
+then.2.i:		; preds = %loopexit.6.i
+	br i1 false, label %no_exit.5.i, label %loopexit.5.i.loopexit
+endif.1.i18:		; preds = %loopexit.6.i, %no_exit.5.i
+	br label %loopentry.5.i
+loopexit.5.i.loopexit:		; preds = %then.2.i
+	br label %loopexit.5.i
+loopexit.5.i.loopexit3:		; preds = %loopentry.5.i
+	br label %loopexit.5.i
+loopexit.5.i:		; preds = %loopexit.5.i.loopexit3, %loopexit.5.i.loopexit
+	br i1 false, label %loopentry.5.outer.i, label %loopentry.7.i.loopexit
+loopentry.7.i.loopexit:		; preds = %loopexit.5.i
+	br label %loopentry.7.i
+loopentry.7.i:		; preds = %loopentry.7.i.loopexit, %loopentry.4.i15
+	br i1 false, label %no_exit.7.i.preheader, label %hamming.exit.i
+no_exit.7.i.preheader:		; preds = %loopentry.7.i
+	br label %no_exit.7.i
+no_exit.7.i:		; preds = %no_exit.7.i, %no_exit.7.i.preheader
+	br i1 false, label %no_exit.7.i, label %loopexit.7.i
+loopexit.7.i:		; preds = %no_exit.7.i
+	br i1 false, label %no_exit.i.i.preheader, label %hamming.exit.i
+no_exit.i.i.preheader:		; preds = %loopexit.7.i
+	br label %no_exit.i.i
+no_exit.i.i:		; preds = %no_exit.i.i, %no_exit.i.i.preheader
+	br i1 false, label %no_exit.i.i, label %hamming.exit.i.loopexit
+hamming.exit.i.loopexit:		; preds = %no_exit.i.i
+	br label %hamming.exit.i
+hamming.exit.i:		; preds = %hamming.exit.i.loopexit, %loopexit.7.i, %loopentry.7.i
+	br i1 false, label %endif.3.i19, label %loopentry.8.i
+loopentry.8.i:		; preds = %hamming.exit.i
+	br i1 false, label %shortcirc_next.i.preheader, label %loopexit.8.i
+shortcirc_next.i.preheader:		; preds = %loopentry.8.i
+	br label %shortcirc_next.i
+shortcirc_next.i:		; preds = %no_exit.8.i, %shortcirc_next.i.preheader
+	br i1 false, label %no_exit.8.i, label %loopexit.8.i.loopexit
+no_exit.8.i:		; preds = %shortcirc_next.i
+	br i1 false, label %shortcirc_next.i, label %loopexit.8.i.loopexit
+loopexit.8.i.loopexit:		; preds = %no_exit.8.i, %shortcirc_next.i
+	br label %loopexit.8.i
+loopexit.8.i:		; preds = %loopexit.8.i.loopexit, %loopentry.8.i
+	br i1 false, label %no_exit.9.i.preheader, label %endif.3.i19
+no_exit.9.i.preheader:		; preds = %loopexit.8.i
+	br label %no_exit.9.i
+no_exit.9.i:		; preds = %no_exit.9.i, %no_exit.9.i.preheader
+	br i1 false, label %no_exit.9.i, label %endif.3.i19.loopexit
+endif.3.i19.loopexit:		; preds = %no_exit.9.i
+	br label %endif.3.i19
+endif.3.i19:		; preds = %endif.3.i19.loopexit, %loopexit.8.i, %hamming.exit.i
+	br i1 false, label %loopentry.2.i, label %loopexit.1.i20
+loopexit.1.i20:		; preds = %endif.3.i19
+	br i1 false, label %then.4.i, label %UnifiedReturnBlock.i
+then.4.i:		; preds = %loopexit.1.i20
+	br label %runcont.exit
+UnifiedReturnBlock.i:		; preds = %loopexit.1.i20
+	br label %runcont.exit
+runcont.exit:		; preds = %UnifiedReturnBlock.i, %then.4.i
+	br i1 false, label %no_exit.1.i36.preheader, label %loopentry.3.i37
+no_exit.1.i36.preheader:		; preds = %runcont.exit, %run.exit
+	br label %no_exit.1.i36
+no_exit.1.i36:		; preds = %no_exit.1.i36, %no_exit.1.i36.preheader
+	br i1 false, label %no_exit.1.i36, label %loopentry.3.i37.loopexit
+loopentry.3.i37.loopexit:		; preds = %no_exit.1.i36
+	br label %loopentry.3.i37
+loopentry.3.i37:		; preds = %loopentry.3.i37.loopexit, %runcont.exit, %run.exit
+	br i1 false, label %loopentry.4.i38.preheader, label %loopexit.3.i
+loopentry.4.i38.preheader:		; preds = %loopentry.3.i37
+	br label %loopentry.4.i38
+loopentry.4.i38:		; preds = %loopexit.4.i42, %loopentry.4.i38.preheader
+	br i1 false, label %no_exit.3.i.preheader, label %loopexit.4.i42
+no_exit.3.i.preheader:		; preds = %loopentry.4.i38
+	br label %no_exit.3.i
+no_exit.3.i:		; preds = %no_exit.3.i.backedge, %no_exit.3.i.preheader
+	br i1 false, label %endif.3.i, label %else.1.i
+else.1.i:		; preds = %no_exit.3.i
+	br i1 false, label %no_exit.3.i.backedge, label %loopexit.4.i42.loopexit
+no_exit.3.i.backedge:		; preds = %endif.3.i, %else.1.i
+	br label %no_exit.3.i
+endif.3.i:		; preds = %no_exit.3.i
+	br i1 false, label %no_exit.3.i.backedge, label %loopexit.4.i42.loopexit
+loopexit.4.i42.loopexit:		; preds = %endif.3.i, %else.1.i
+	br label %loopexit.4.i42
+loopexit.4.i42:		; preds = %loopexit.4.i42.loopexit, %loopentry.4.i38
+	br i1 false, label %loopentry.4.i38, label %loopexit.3.i.loopexit
+loopexit.3.i.loopexit:		; preds = %loopexit.4.i42
+	br label %loopexit.3.i
+loopexit.3.i:		; preds = %loopexit.3.i.loopexit, %loopentry.3.i37
+	%tmp.13.i155 = icmp slt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.13.i155, label %no_exit.0.i31, label %loopentry.1.i30.loopexit
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,96 @@
+; RUN: opt < %s -gvn -simplifycfg -disable-output
+; PR867
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-unknown-linux-gnu"
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.lang_type = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.machine_function = type { i32, i32, i8*, i32, i32 }
+	%struct.rtunion = type { i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.temp_slot = type opaque
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }
+	%struct.u = type { [1 x i64] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+	%union.tree_ann_d = type opaque
+ at mode_class = external global [35 x i8]		; <[35 x i8]*> [#uses=3]
+
+define void @fold_builtin_classify() {
+entry:
+	%tmp63 = load i32, i32* null		; <i32> [#uses=1]
+	switch i32 %tmp63, label %bb276 [
+		 i32 414, label %bb145
+		 i32 417, label %bb
+	]
+bb:		; preds = %entry
+	ret void
+bb145:		; preds = %entry
+	%tmp146 = load %struct.tree_node*, %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp148 = getelementptr %struct.tree_node, %struct.tree_node* %tmp146, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
+	%tmp149 = load %struct.tree_node*, %struct.tree_node** %tmp148		; <%struct.tree_node*> [#uses=1]
+	%tmp150 = bitcast %struct.tree_node* %tmp149 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
+	%tmp151 = getelementptr %struct.tree_type, %struct.tree_type* %tmp150, i32 0, i32 6		; <i16*> [#uses=1]
+	%tmp151.upgrd.1 = bitcast i16* %tmp151 to i32*		; <i32*> [#uses=1]
+	%tmp152 = load i32, i32* %tmp151.upgrd.1		; <i32> [#uses=1]
+	%tmp154 = lshr i32 %tmp152, 16		; <i32> [#uses=1]
+	%tmp154.mask = and i32 %tmp154, 127		; <i32> [#uses=1]
+	%gep.upgrd.2 = zext i32 %tmp154.mask to i64		; <i64> [#uses=1]
+	%tmp155 = getelementptr [35 x i8], [35 x i8]* @mode_class, i32 0, i64 %gep.upgrd.2		; <i8*> [#uses=1]
+	%tmp156 = load i8, i8* %tmp155		; <i8> [#uses=1]
+	%tmp157 = icmp eq i8 %tmp156, 4		; <i1> [#uses=1]
+	br i1 %tmp157, label %cond_next241, label %cond_true158
+cond_true158:		; preds = %bb145
+	%tmp172 = load %struct.tree_node*, %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp174 = getelementptr %struct.tree_node, %struct.tree_node* %tmp172, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
+	%tmp175 = load %struct.tree_node*, %struct.tree_node** %tmp174		; <%struct.tree_node*> [#uses=1]
+	%tmp176 = bitcast %struct.tree_node* %tmp175 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
+	%tmp177 = getelementptr %struct.tree_type, %struct.tree_type* %tmp176, i32 0, i32 6		; <i16*> [#uses=1]
+	%tmp177.upgrd.3 = bitcast i16* %tmp177 to i32*		; <i32*> [#uses=1]
+	%tmp178 = load i32, i32* %tmp177.upgrd.3		; <i32> [#uses=1]
+	%tmp180 = lshr i32 %tmp178, 16		; <i32> [#uses=1]
+	%tmp180.mask = and i32 %tmp180, 127		; <i32> [#uses=1]
+	%gep.upgrd.4 = zext i32 %tmp180.mask to i64		; <i64> [#uses=1]
+	%tmp181 = getelementptr [35 x i8], [35 x i8]* @mode_class, i32 0, i64 %gep.upgrd.4		; <i8*> [#uses=1]
+	%tmp182 = load i8, i8* %tmp181		; <i8> [#uses=1]
+	%tmp183 = icmp eq i8 %tmp182, 8		; <i1> [#uses=1]
+	br i1 %tmp183, label %cond_next241, label %cond_true184
+cond_true184:		; preds = %cond_true158
+	%tmp185 = load %struct.tree_node*, %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp187 = getelementptr %struct.tree_node, %struct.tree_node* %tmp185, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
+	%tmp188 = load %struct.tree_node*, %struct.tree_node** %tmp187		; <%struct.tree_node*> [#uses=1]
+	%tmp189 = bitcast %struct.tree_node* %tmp188 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
+	%tmp190 = getelementptr %struct.tree_type, %struct.tree_type* %tmp189, i32 0, i32 6		; <i16*> [#uses=1]
+	%tmp190.upgrd.5 = bitcast i16* %tmp190 to i32*		; <i32*> [#uses=1]
+	%tmp191 = load i32, i32* %tmp190.upgrd.5		; <i32> [#uses=1]
+	%tmp193 = lshr i32 %tmp191, 16		; <i32> [#uses=1]
+	%tmp193.mask = and i32 %tmp193, 127		; <i32> [#uses=1]
+	%gep.upgrd.6 = zext i32 %tmp193.mask to i64		; <i64> [#uses=1]
+	%tmp194 = getelementptr [35 x i8], [35 x i8]* @mode_class, i32 0, i64 %gep.upgrd.6		; <i8*> [#uses=1]
+	%tmp195 = load i8, i8* %tmp194		; <i8> [#uses=1]
+	%tmp196 = icmp eq i8 %tmp195, 4		; <i1> [#uses=1]
+	br i1 %tmp196, label %cond_next241, label %cond_true197
+cond_true197:		; preds = %cond_true184
+	ret void
+cond_next241:		; preds = %cond_true184, %cond_true158, %bb145
+	%tmp245 = load i32, i32* null		; <i32> [#uses=0]
+	ret void
+bb276:		; preds = %entry
+	ret void
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; PR957
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: select
+
+ at G = extern_weak global i32
+
+define i32 @test(i32 %tmp) {
+cond_false179:
+	%tmp181 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
+	br i1 %tmp181, label %cond_true182, label %cond_next185
+cond_true182:		; preds = %cond_false179
+	br label %cond_next185
+cond_next185:		; preds = %cond_true182, %cond_false179
+	%d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ]		; <i32> [#uses=1]
+	ret i32 %d0.3
+}
+
+define i32 @test2(i32 %tmp) {
+cond_false179:
+	%tmp181 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
+	br i1 %tmp181, label %cond_true182, label %cond_next185
+cond_true182:		; preds = %cond_false179
+	br label %cond_next185
+cond_next185:		; preds = %cond_true182, %cond_false179
+	%d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ]		; <i32> [#uses=1]
+	call i32 @test( i32 4 )		; <i32>:0 [#uses=0]
+	ret i32 %d0.3
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,131 @@
+; RUN: opt < %s -simplifycfg | llvm-dis
+; END.
+
+; ModuleID = '2006-12-08-Ptr-ICmp-Branch.ll'
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+	%struct.charsequence = type { i8*, i32, i32 }
+	%struct.trie_s = type { [26 x %struct.trie_s*], i32 }
+ at str = external global [14 x i8]		; <[14 x i8]*> [#uses=0]
+ at str.upgrd.1 = external global [32 x i8]		; <[32 x i8]*> [#uses=0]
+ at str.upgrd.2 = external global [12 x i8]		; <[12 x i8]*> [#uses=0]
+ at C.0.2294 = external global %struct.charsequence		; <%struct.charsequence*> [#uses=3]
+ at t = external global %struct.trie_s*		; <%struct.trie_s**> [#uses=0]
+ at str.upgrd.3 = external global [3 x i8]		; <[3 x i8]*> [#uses=0]
+ at str.upgrd.4 = external global [26 x i8]		; <[26 x i8]*> [#uses=0]
+
+declare void @charsequence_reset(%struct.charsequence*)
+
+declare void @free(i8*)
+
+declare void @charsequence_push(%struct.charsequence*, i8)
+
+declare i8* @charsequence_val(%struct.charsequence*)
+
+declare i32 @_IO_getc(%struct.FILE*)
+
+declare i32 @tolower(i32)
+
+declare %struct.trie_s* @trie_insert(%struct.trie_s*, i8*)
+
+declare i32 @feof(%struct.FILE*)
+
+define void @addfile(%struct.trie_s* %t, %struct.FILE* %f) {
+entry:
+	%t_addr = alloca %struct.trie_s*		; <%struct.trie_s**> [#uses=2]
+	%f_addr = alloca %struct.FILE*		; <%struct.FILE**> [#uses=3]
+	%c = alloca i8, align 1		; <i8*> [#uses=7]
+	%wstate = alloca i32, align 4		; <i32*> [#uses=4]
+	%cs = alloca %struct.charsequence, align 16		; <%struct.charsequence*> [#uses=7]
+	%str = alloca i8*, align 4		; <i8**> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %struct.trie_s* %t, %struct.trie_s** %t_addr
+	store %struct.FILE* %f, %struct.FILE** %f_addr
+	store i32 0, i32* %wstate
+	%tmp = getelementptr %struct.charsequence, %struct.charsequence* %cs, i64 0, i32 0		; <i8**> [#uses=1]
+	%tmp1 = getelementptr %struct.charsequence, %struct.charsequence* @C.0.2294, i64 0, i32 0		; <i8**> [#uses=1]
+	%tmp.upgrd.5 = load i8*, i8** %tmp1		; <i8*> [#uses=1]
+	store i8* %tmp.upgrd.5, i8** %tmp
+	%tmp.upgrd.6 = getelementptr %struct.charsequence, %struct.charsequence* %cs, i64 0, i32 1		; <i32*> [#uses=1]
+	%tmp2 = getelementptr %struct.charsequence, %struct.charsequence* @C.0.2294, i64 0, i32 1		; <i32*> [#uses=1]
+	%tmp.upgrd.7 = load i32, i32* %tmp2		; <i32> [#uses=1]
+	store i32 %tmp.upgrd.7, i32* %tmp.upgrd.6
+	%tmp3 = getelementptr %struct.charsequence, %struct.charsequence* %cs, i64 0, i32 2		; <i32*> [#uses=1]
+	%tmp4 = getelementptr %struct.charsequence, %struct.charsequence* @C.0.2294, i64 0, i32 2		; <i32*> [#uses=1]
+	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
+	store i32 %tmp5, i32* %tmp3
+	br label %bb33
+bb:		; preds = %bb33
+	%tmp.upgrd.8 = load %struct.FILE*, %struct.FILE** %f_addr		; <%struct.FILE*> [#uses=1]
+	%tmp.upgrd.9 = call i32 @_IO_getc( %struct.FILE* %tmp.upgrd.8 )		; <i32> [#uses=1]
+	%tmp6 = call i32 @tolower( i32 %tmp.upgrd.9 )		; <i32> [#uses=1]
+	%tmp6.upgrd.10 = trunc i32 %tmp6 to i8		; <i8> [#uses=1]
+	store i8 %tmp6.upgrd.10, i8* %c
+	%tmp7 = load i32, i32* %wstate		; <i32> [#uses=1]
+	%tmp.upgrd.11 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.11, label %cond_true, label %cond_false
+cond_true:		; preds = %bb
+	%tmp.upgrd.12 = load i8, i8* %c		; <i8> [#uses=1]
+	%tmp8 = icmp sle i8 %tmp.upgrd.12, 96		; <i1> [#uses=1]
+	br i1 %tmp8, label %cond_true9, label %cond_next
+cond_true9:		; preds = %cond_true
+	br label %bb16
+cond_next:		; preds = %cond_true
+	%tmp10 = load i8, i8* %c		; <i8> [#uses=1]
+	%tmp11 = icmp sgt i8 %tmp10, 122		; <i1> [#uses=1]
+	br i1 %tmp11, label %cond_true12, label %cond_next13
+cond_true12:		; preds = %cond_next
+	br label %bb16
+cond_next13:		; preds = %cond_next
+	%tmp14 = load i8, i8* %c		; <i8> [#uses=1]
+	%tmp14.upgrd.13 = sext i8 %tmp14 to i32		; <i32> [#uses=1]
+	%tmp1415 = trunc i32 %tmp14.upgrd.13 to i8		; <i8> [#uses=1]
+	call void @charsequence_push( %struct.charsequence* %cs, i8 %tmp1415 )
+	br label %bb21
+bb16:		; preds = %cond_true12, %cond_true9
+	%tmp17 = call i8* @charsequence_val( %struct.charsequence* %cs )		; <i8*> [#uses=1]
+	store i8* %tmp17, i8** %str
+	%tmp.upgrd.14 = load %struct.trie_s*, %struct.trie_s** %t_addr		; <%struct.trie_s*> [#uses=1]
+	%tmp18 = load i8*, i8** %str		; <i8*> [#uses=1]
+	%tmp19 = call %struct.trie_s* @trie_insert( %struct.trie_s* %tmp.upgrd.14, i8* %tmp18 )		; <%struct.trie_s*> [#uses=0]
+	%tmp20 = load i8*, i8** %str		; <i8*> [#uses=1]
+	call void @free( i8* %tmp20 )
+	store i32 0, i32* %wstate
+	br label %bb21
+bb21:		; preds = %bb16, %cond_next13
+	br label %cond_next32
+cond_false:		; preds = %bb
+	%tmp22 = load i8, i8* %c		; <i8> [#uses=1]
+	%tmp23 = icmp sgt i8 %tmp22, 96		; <i1> [#uses=1]
+	br i1 %tmp23, label %cond_true24, label %cond_next31
+cond_true24:		; preds = %cond_false
+	%tmp25 = load i8, i8* %c		; <i8> [#uses=1]
+	%tmp26 = icmp sle i8 %tmp25, 122		; <i1> [#uses=1]
+	br i1 %tmp26, label %cond_true27, label %cond_next30
+cond_true27:		; preds = %cond_true24
+	call void @charsequence_reset( %struct.charsequence* %cs )
+	%tmp28 = load i8, i8* %c		; <i8> [#uses=1]
+	%tmp28.upgrd.15 = sext i8 %tmp28 to i32		; <i32> [#uses=1]
+	%tmp2829 = trunc i32 %tmp28.upgrd.15 to i8		; <i8> [#uses=1]
+	call void @charsequence_push( %struct.charsequence* %cs, i8 %tmp2829 )
+	store i32 1, i32* %wstate
+	br label %cond_next30
+cond_next30:		; preds = %cond_true27, %cond_true24
+	br label %cond_next31
+cond_next31:		; preds = %cond_next30, %cond_false
+	br label %cond_next32
+cond_next32:		; preds = %cond_next31, %bb21
+	br label %bb33
+bb33:		; preds = %cond_next32, %entry
+	%tmp34 = load %struct.FILE*, %struct.FILE** %f_addr		; <%struct.FILE*> [#uses=1]
+	%tmp35 = call i32 @feof( %struct.FILE* %tmp34 )		; <i32> [#uses=1]
+	%tmp36 = icmp eq i32 %tmp35, 0		; <i1> [#uses=1]
+	br i1 %tmp36, label %bb, label %bb37
+bb37:		; preds = %bb33
+	br label %return
+return:		; preds = %bb37
+	ret void
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,20 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: invoke
+
+declare i32 @func(i8*) nounwind
+
+define i32 @test() personality i32 (...)* @__gxx_personality_v0 {
+	invoke i32 @func( i8* null )
+			to label %Cont unwind label %Other		; <i32>:1 [#uses=0]
+
+Cont:		; preds = %0
+	ret i32 0
+
+Other:		; preds = %0
+	landingpad { i8*, i32 }
+		catch i8* null
+	ret i32 1
+}
+
+declare i32 @__gxx_personality_v0(...)

Added: llvm/trunk/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,37 @@
+;RUN: opt < %s -simplifycfg -disable-output
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define i32 @bork() nounwind  {
+entry:
+	br label %bb5.outer
+
+bb5.outer.loopexit:		; preds = %bb5
+	br label %bb5.outer
+
+bb5.outer:		; preds = %bb5.outer.loopexit, %entry
+	%undo.0.ph = phi i32 [ 0, %entry ], [ 1, %bb5.outer.loopexit ]		; <i32> [#uses=1]
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb5.outer
+	%tmp6 = tail call i32 (...) @foo( ) nounwind 		; <i32> [#uses=1]
+	switch i32 %tmp6, label %bb13 [
+		 i32 -1, label %bb10
+		 i32 102, label %bb5
+		 i32 110, label %bb5.outer.loopexit
+	]
+
+bb10:		; preds = %bb5
+	%tmp12 = tail call i32 (...) @bar( i32 %undo.0.ph ) nounwind 		; <i32> [#uses=0]
+	br label %UnifiedReturnBlock
+
+bb13:		; preds = %bb5
+	br label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %bb13, %bb10
+	%UnifiedRetVal = phi i32 [ 1, %bb10 ], [ 258, %bb13 ]		; <i32> [#uses=1]
+	ret i32 %UnifiedRetVal
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)

Added: llvm/trunk/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; The phi should not be eliminated in this case, because the divide op could trap.
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+ at G = weak global i32 0, align 8		; <i32*> [#uses=2]
+
+define void @test(i32 %X, i32 %Y, i32 %Z) {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = load i32, i32* @G, align 8		; <i32> [#uses=2]
+	%tmp3 = icmp eq i32 %X, %Y		; <i1> [#uses=1]
+	%tmp34 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%tmp7 = udiv i32 %tmp, %Z		; <i32> [#uses=1]
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+; CHECK: = phi i32
+	%F.0 = phi i32 [ %tmp, %entry ], [ %tmp7, %cond_true ]		; <i32> [#uses=1]
+	store i32 %F.0, i32* @G, align 8
+	ret void
+}
+

Added: llvm/trunk/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,131 @@
+; RUN: opt < %s -simplifycfg -S > %t
+; RUN: not grep "^BB.tomerge" %t
+; RUN: grep "^BB.nomerge" %t | count 4
+
+; ModuleID = '<stdin>' 
+declare i1 @foo()
+
+declare i1 @bar(i32)
+
+; This function can't be merged
+define void @a() {
+entry:
+	br label %BB.nomerge
+
+BB.nomerge:		; preds = %Common, %entry
+        ; This phi has a conflicting value (0) with below phi (2), so blocks
+        ; can't be merged.
+	%a = phi i32 [ 1, %entry ], [ 0, %Common ]		; <i32> [#uses=1]
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.nomerge
+	%b = phi i32 [ %a, %BB.nomerge ], [ 2, %Common ]		; <i32> [#uses=0]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.nomerge, label %Succ
+
+Exit:		; preds = %Succ
+	ret void
+}
+
+; This function can't be merged
+define void @b() {
+entry:
+	br label %BB.nomerge
+
+BB.nomerge:		; preds = %Common, %entry
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.nomerge
+        ; This phi has confliction values for Common and (through BB) Common,
+        ; blocks can't be merged
+	%b = phi i32 [ 1, %BB.nomerge ], [ 2, %Common ]		; <i32> [#uses=0]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.nomerge, label %Succ
+
+Exit:		; preds = %Succ
+	ret void
+}
+
+; This function can't be merged (for keeping canonical loop structures)
+define void @c() {
+entry:
+	br label %BB.nomerge
+
+BB.nomerge:		; preds = %Common, %entry
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.tomerge, %Pre-Exit
+        ; This phi has identical values for Common and (through BB) Common,
+        ; blocks can't be merged
+	%b = phi i32 [ 1, %BB.nomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Pre-Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.nomerge, label %Succ
+
+Pre-Exit:       ; preds = %Succ
+        ; This adds a backedge, so the %b phi node gets a third branch and is
+        ; not completely trivial
+	%cond2 = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond2, label %Succ, label %Exit
+        
+Exit:		; preds = %Pre-Exit
+	ret void
+}
+
+; This function can't be merged (for keeping canonical loop structures)
+define void @d() {
+entry:
+	br label %BB.nomerge
+
+BB.nomerge:		; preds = %Common, %entry
+        ; This phi has a matching value (0) with below phi (0), so blocks
+        ; can be merged.
+	%a = phi i32 [ 1, %entry ], [ 0, %Common ]		; <i32> [#uses=1]
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.tomerge
+	%b = phi i32 [ %a, %BB.nomerge ], [ 0, %Common ]		; <i32> [#uses=0]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.nomerge, label %Succ
+
+Exit:		; preds = %Succ
+	ret void
+}
+
+; This function can be merged
+define void @e() {
+entry:
+	br label %Succ
+
+Succ:		; preds = %Use, %entry
+        ; This phi is used somewhere else than Succ, but this should not prevent
+        ; merging this block
+	%a = phi i32 [ 1, %entry ], [ 0, %Use ]		; <i32> [#uses=1]
+	br label %BB.tomerge
+
+BB.tomerge:		; preds = %BB.tomerge
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Use, label %Exit
+
+Use:		; preds = %Succ
+	%cond = call i1 @bar( i32 %a )		; <i1> [#uses=1]
+	br i1 %cond, label %Succ, label %Exit
+
+Exit:		; preds = %Use, %Succ
+	ret void
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+; PR2540
+; Outval should end up with a select from 0/2, not all constants.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+ at g_37 = common global i32 0		; <i32*> [#uses=1]
+ at .str = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define i32 @main() nounwind  {
+; CHECK-LABEL: @main(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L:%.*]] = load i32, i32* @g_37, align 4
+; CHECK-NEXT:    [[CMPA:%.*]] = icmp ne i32 [[L]], 0
+; CHECK-NEXT:    br i1 [[CMPA]], label %func_1.exit, label %mooseblock
+; CHECK:       mooseblock:
+; CHECK-NEXT:    [[CMPB:%.*]] = icmp eq i1 [[CMPA]], false
+; CHECK-NEXT:    [[BRMERGE:%.*]] = or i1 [[CMPB]], [[CMPA]]
+; CHECK-NEXT:    [[DOTMUX:%.*]] = select i1 [[CMPB]], i32 0, i32 2
+; CHECK-NEXT:    br i1 [[BRMERGE]], label %func_1.exit, label %infloop
+; CHECK:       func_1.exit:
+; CHECK-NEXT:    [[OUTVAL:%.*]] = phi i32 [ 1, %entry ], [ [[DOTMUX]], %mooseblock ]
+; CHECK-NEXT:    [[POUT:%.*]] = tail call i32 (i8*, ...) @printf
+; CHECK-NEXT:    ret i32 0
+; CHECK:       infloop:
+; CHECK-NEXT:    br label %infloop
+;
+entry:
+  %l = load i32, i32* @g_37, align 4		; <i32> [#uses=1]
+  %cmpa = icmp ne i32 %l, 0		; <i1> [#uses=3]
+  br i1 %cmpa, label %func_1.exit, label %mooseblock
+
+mooseblock:		; preds = %entry
+  %cmpb = icmp eq i1 %cmpa, false		; <i1> [#uses=2]
+  br i1 %cmpb, label %monkeyblock, label %beeblock
+
+monkeyblock:		; preds = %monkeyblock, %mooseblock
+  br i1 %cmpb, label %cowblock, label %monkeyblock
+
+beeblock:		; preds = %beeblock, %mooseblock
+  br i1 %cmpa, label %cowblock, label %beeblock
+
+cowblock:		; preds = %beeblock, %monkeyblock
+  %cowval = phi i32 [ 2, %beeblock ], [ 0, %monkeyblock ]		; <i32> [#uses=1]
+  br label %func_1.exit
+
+func_1.exit:		; preds = %cowblock, %entry
+  %outval = phi i32 [ %cowval, %cowblock ], [ 1, %entry ]		; <i32> [#uses=1]
+  %pout = tail call i32 (i8*, ...) @printf( i8* noalias  getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %outval ) nounwind 		; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...) nounwind
+

Added: llvm/trunk/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,60 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; PR 2777
+ at g_103 = common global i32 0		; <i32*> [#uses=1]
+
+define i32 @func_127(i32 %p_129) nounwind {
+entry:
+	load i32, i32* @g_103, align 4		; <i32>:0 [#uses=1]
+	icmp eq i32 %0, 0		; <i1>:1 [#uses=2]
+	br i1 %1, label %bb6.preheader, label %entry.return_crit_edge
+
+entry.return_crit_edge:		; preds = %entry
+	br label %return
+
+bb6.preheader:		; preds = %entry
+	br i1 %1, label %bb6.preheader.split.us, label %bb6.preheader.split
+
+bb6.preheader.split.us:		; preds = %bb6.preheader
+	br label %return.loopexit.split
+
+bb6.preheader.split:		; preds = %bb6.preheader
+	br label %bb6
+
+bb6:		; preds = %bb17.bb6_crit_edge, %bb6.preheader.split
+	%indvar35 = phi i32 [ 0, %bb6.preheader.split ], [ %indvar.next36, %bb17.bb6_crit_edge ]		; <i32> [#uses=1]
+	%p_129_addr.3.reg2mem.0 = phi i32 [ %p_129_addr.2, %bb17.bb6_crit_edge ], [ %p_129, %bb6.preheader.split ]		; <i32> [#uses=3]
+	icmp eq i32 %p_129_addr.3.reg2mem.0, 0		; <i1>:2 [#uses=1]
+	br i1 %2, label %bb6.bb17_crit_edge, label %bb8
+
+bb6.bb17_crit_edge:		; preds = %bb6
+	br label %bb17
+
+bb8:		; preds = %bb6
+	br label %bb13
+
+bb13:		; preds = %bb8
+	br label %bb17
+
+bb17:		; preds = %bb13, %bb6.bb17_crit_edge
+	%p_129_addr.2 = phi i32 [ %p_129_addr.3.reg2mem.0, %bb13 ], [ %p_129_addr.3.reg2mem.0, %bb6.bb17_crit_edge ]		; <i32> [#uses=1]
+	%indvar.next36 = add i32 %indvar35, 1		; <i32> [#uses=2]
+	%exitcond37 = icmp eq i32 %indvar.next36, -1		; <i1> [#uses=1]
+	br i1 %exitcond37, label %return.loopexit, label %bb17.bb6_crit_edge
+
+bb17.bb6_crit_edge:		; preds = %bb17
+	br label %bb6
+
+return.loopexit:		; preds = %bb17
+	br label %return.loopexit.split
+
+return.loopexit.split:		; preds = %return.loopexit, %bb6.preheader.split.us
+	br label %return
+
+return:		; preds = %return.loopexit.split, %entry.return_crit_edge
+	ret i32 1
+}
+
+define i32 @func_135(i8 zeroext %p_137, i32 %p_138, i32 %p_140) nounwind {
+entry:
+	ret i32 undef
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,18 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; PR 2800
+
+define void @foo() {
+start:
+	%tmp = call i1 @bar( )		; <i1> [#uses=4]
+	br i1 %tmp, label %brtrue, label %brfalse
+
+brtrue:		; preds = %start
+	%tmpnew = and i1 %tmp, %tmp		; <i1> [#uses=1]
+	br label %brfalse
+
+brfalse:		; preds = %brtrue, %start
+	%andandtmp.0 = phi i1 [ %tmp, %start ], [ %tmpnew, %brtrue ]		; <i1> [#uses=0]
+	ret void
+}
+
+declare i1 @bar()

Added: llvm/trunk/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,36 @@
+; RUN: opt < %s -simplifycfg
+; PR2855
+
+define i32 @_Z1fPii(i32* %b, i32 %f) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb9, %bb7, %bb, %entry
+	%__c2.2 = phi i32 [ undef, %entry ], [ %__c2.1, %bb7 ], [ %__c2.1, %bb9 ]		; <i32> [#uses=2]
+	%s.0 = phi i32 [ 0, %entry ], [ 0, %bb7 ], [ %2, %bb9 ]		; <i32> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%0 = icmp slt i32 0, %f		; <i1> [#uses=1]
+	br i1 %0, label %bb3, label %bb6
+
+bb3:		; preds = %bb1
+	%1 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb6, label %bb5
+
+bb5:		; preds = %bb3
+	br label %bb7
+
+bb6:		; preds = %bb3, %bb1
+	%__c2.0 = phi i32 [ 0, %bb3 ], [ %__c2.2, %bb1 ]		; <i32> [#uses=1]
+	br label %bb7
+
+bb7:		; preds = %bb6, %bb5
+	%__c2.1 = phi i32 [ 0, %bb5 ], [ %__c2.0, %bb6 ]		; <i32> [#uses=2]
+	%iftmp.1.0 = phi i1 [ false, %bb5 ], [ true, %bb6 ]		; <i1> [#uses=1]
+	br i1 %iftmp.1.0, label %bb, label %bb9
+
+bb9:		; preds = %bb7
+	%2 = add i32 %s.0, 2		; <i32> [#uses=1]
+	br label %bb
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,13 @@
+; RUN: opt < %s -simplifycfg | llvm-dis
+define i32 @test() {
+entry:
+	br label %T
+T:
+	%C = phi i1 [false, %entry] 
+	br i1 %C, label %X, label %Y
+X:
+	ret i32 2
+Y:
+	add i32 1, 2
+	ret i32 1
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,46 @@
+; RUN: opt < %s -simplifycfg -S | not grep icmp
+; ModuleID = '/tmp/x.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @x(i32 %x) {
+entry:
+	%cmp = icmp eq i32 %x, 8		; <i1> [#uses=1]
+	br i1 %cmp, label %ifthen, label %ifend
+
+ifthen:		; preds = %entry
+	%call = call i32 (...) @foo()		; <i32> [#uses=0]
+	br label %ifend
+
+ifend:		; preds = %ifthen, %entry
+	%cmp2 = icmp ne i32 %x, 8		; <i1> [#uses=1]
+	br i1 %cmp2, label %ifthen3, label %ifend5
+
+ifthen3:		; preds = %ifend
+	%call4 = call i32 (...) @foo()		; <i32> [#uses=0]
+	br label %ifend5
+
+ifend5:		; preds = %ifthen3, %ifend
+	%cmp7 = icmp eq i32 %x, 9		; <i1> [#uses=1]
+	br i1 %cmp7, label %ifthen8, label %ifend10
+
+ifthen8:		; preds = %ifend5
+	%call9 = call i32 (...) @bar()		; <i32> [#uses=0]
+	br label %ifend10
+
+ifend10:		; preds = %ifthen8, %ifend5
+	%cmp12 = icmp ne i32 %x, 9		; <i1> [#uses=1]
+	br i1 %cmp12, label %ifthen13, label %ifend15
+
+ifthen13:		; preds = %ifend10
+	%call14 = call i32 (...) @bar()		; <i32> [#uses=0]
+	br label %ifend15
+
+ifend15:		; preds = %ifthen13, %ifend10
+	ret i32 0
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)
+

Added: llvm/trunk/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; PR3016
+; Dead use caused invariant violation.
+
+define i32 @func_105(i1 %tmp5, i1 %tmp7) nounwind {
+BB:
+	br i1 true, label %BB2, label %BB1
+
+BB1:		; preds = %BB
+	br label %BB2
+
+BB2:		; preds = %BB1, %BB
+	%tmp3 = phi i1 [ true, %BB ], [ false, %BB1 ]		; <i1> [#uses=1]
+	br label %BB9
+
+BB9:		; preds = %BB11, %BB2
+	%tmp10 = phi i32 [ 0, %BB2 ], [ %tmp12, %BB11 ]		; <i32> [#uses=1]
+	br i1 %tmp5, label %BB11, label %BB13
+
+BB11:		; preds = %BB13, %BB9
+	%tmp12 = phi i32 [ 0, %BB13 ], [ %tmp10, %BB9 ]		; <i32> [#uses=2]
+	br i1 %tmp3, label %BB9, label %BB20
+
+BB13:		; preds = %BB13, %BB9
+	%tmp14 = phi i32 [ 0, %BB9 ], [ %tmp14, %BB13 ]		; <i32> [#uses=1]
+	br i1 %tmp7, label %BB13, label %BB11
+
+BB20:		; preds = %BB11
+	ret i32 %tmp12
+}

Added: llvm/trunk/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt < %s -simplifycfg -S | not grep select
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+module asm ".globl _foo"
+module asm "_foo: ret"
+module asm ".globl _i"
+module asm ".set _i, 0"
+ at i = extern_weak global i32		; <i32*> [#uses=2]
+ at j = common global i32 0		; <i32*> [#uses=1]
+ at ed = common global double 0.000000e+00, align 8		; <double*> [#uses=1]
+
+define i32 @main() nounwind ssp {
+entry:
+	br label %bb4
+
+bb:		; preds = %bb4
+	br i1 icmp ne (i32* @i, i32* null), label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	%0 = load i32, i32* @i, align 4		; <i32> [#uses=1]
+	br label %bb3
+
+bb2:		; preds = %bb
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	%storemerge = phi i32 [ %0, %bb1 ], [ 0, %bb2 ]		; <i32> [#uses=2]
+	store i32 %storemerge, i32* @j
+	%1 = sitofp i32 %storemerge to double		; <double> [#uses=1]
+	%2 = call double @sin(double %1) nounwind readonly		; <double> [#uses=1]
+	%3 = fadd double %2, %d.0		; <double> [#uses=1]
+	%4 = add i32 %l.0, 1		; <i32> [#uses=1]
+	br label %bb4
+
+bb4:		; preds = %bb3, %entry
+	%d.0 = phi double [ undef, %entry ], [ %3, %bb3 ]		; <double> [#uses=2]
+	%l.0 = phi i32 [ 0, %entry ], [ %4, %bb3 ]		; <i32> [#uses=2]
+	%5 = icmp sgt i32 %l.0, 99		; <i1> [#uses=1]
+	br i1 %5, label %bb5, label %bb
+
+bb5:		; preds = %bb4
+	store double %d.0, double* @ed, align 8
+	ret i32 0
+}
+
+declare double @sin(double) nounwind readonly

Added: llvm/trunk/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt -simplifycfg -disable-output < %s
+; END.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @bar(i32)
+
+define void @foo() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+ invoke void @bar(i32 undef)
+         to label %r unwind label %u
+
+r:                                                ; preds = %entry
+ ret void
+
+u:                                                ; preds = %entry
+ %val = landingpad { i8*, i32 }
+          cleanup
+ resume { i8*, i32 } %val
+}
+
+declare i32 @__gxx_personality_v0(...)

Added: llvm/trunk/test/Transforms/SimplifyCFG/2011-03-08-UnreachableUse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/2011-03-08-UnreachableUse.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/2011-03-08-UnreachableUse.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/2011-03-08-UnreachableUse.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+; PR9420
+
+; Note that the crash in PR9420 test is sensitive to the ordering of
+; the transformations done by SimplifyCFG, so this test is likely to rot
+; quickly.
+
+define noalias i8* @func_29() nounwind {
+; CHECK: entry:
+; CHECK-NEXT: unreachable
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc38, %entry
+  %p_34.addr.0 = phi i16 [ 1, %entry ], [ %conv40, %for.inc38 ]
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc29, %for.cond
+  %p_32.addr.0 = phi i1 [ true, %for.cond ], [ true, %for.inc29 ]
+  br i1 %p_32.addr.0, label %for.body8, label %for.inc38
+
+for.body8:                                        ; preds = %for.cond1
+  unreachable
+
+for.inc29:                                        ; preds = %for.cond17
+  br label %for.cond1
+
+for.inc38:                                        ; preds = %for.end32
+  %conv40 = add i16 %p_34.addr.0, 1
+  br label %for.cond
+}




More information about the llvm-commits mailing list