[llvm] 8adfa29 - [Pipelines] Introduce SROA after (final, run-time) loop unrolling

Thu Nov 17 10:35:40 PST 2022

Author: Roman Lebedev
Date: 2022-11-17T21:31:30+03:00
New Revision: 8adfa29706e5407b62a4726e2172894e0dfdc1e8

URL: https://github.com/llvm/llvm-project/commit/8adfa29706e5407b62a4726e2172894e0dfdc1e8
DIFF: https://github.com/llvm/llvm-project/commit/8adfa29706e5407b62a4726e2172894e0dfdc1e8.diff

LOG: [Pipelines] Introduce SROA after (final, run-time) loop unrolling

Now that we are done with loop unrolling, be it either by LoopVectorizer,
or LoopUnroll passes, some variable-offset GEP's into alloca's could have
become constant-offset, thus enabling SROA and alloca promotion,
yet we don't capitalize on that, which is surprizing.

While it would be good to not introduce one more SROA invocation,
but instead move the one from `PassBuilder::buildFunctionSimplificationPipeline()`,
the existing test coverage says that is a bad idea,
though it would be fine compile-time wise: https://llvm-compile-time-tracker.com/compare.php?from=b150d34c47efbd8fa09604bce805c0920360f8d7&to=5a9a5c855158b482552be8c7af3e73d67fa44805&stat=instructions

So instead, i add yet another SROA run.
I have checked, and it needs to be at least after said final loop unrolling.
This is still fine compile-time wise: https://llvm-compile-time-tracker.com/compare.php?from=70324cd88328c0924e605fa81b696572560aa5c9&to=fb489bbef687ad821c3173a931709f9cad9aee8a&stat=instructions

I've encountered this in a real code, `SROA-after-final-loop-unrolling.ll` has been reduced from https://godbolt.org/z/fsdMhETh3

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D136806

Added: 
    

Modified: 
    clang/test/CodeGen/cleanup-destslot-simple.c
    llvm/lib/Passes/PassBuilderPipelines.cpp
    llvm/test/Other/new-pm-defaults.ll
    llvm/test/Other/new-pm-lto-defaults.ll
    llvm/test/Other/new-pm-thinlto-defaults.ll
    llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
    llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
    llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
    llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
    llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling.ll
    llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/cleanup-destslot-simple.c b/clang/test/CodeGen/cleanup-destslot-simple.c
index 18d4c2138090e..2a0e682b0ab21 100644

--- a/clang/test/CodeGen/cleanup-destslot-simple.c
+++ b/clang/test/CodeGen/cleanup-destslot-simple.c
@@ -17,8 +17,8 @@
 // CHECK-LIFETIME-NEXT:    store i32 3, ptr [[X]], align 4, !dbg [[DBG10:![0-9]+]], !tbaa [[TBAA11:![0-9]+]]
 // CHECK-LIFETIME-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[P]]), !dbg [[DBG15:![0-9]+]]
 // CHECK-LIFETIME-NEXT:    store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG16:![0-9]+]], !tbaa [[TBAA17:![0-9]+]]
-// CHECK-LIFETIME-NEXT:    [[P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]]
-// CHECK-LIFETIME-NEXT:    [[TMP0:%.*]] = load i32, ptr [[P_0_P_0_P_0_]], align 4, !dbg [[DBG20:![0-9]+]], !tbaa [[TBAA11]]
+// CHECK-LIFETIME-NEXT:    [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]]
+// CHECK-LIFETIME-NEXT:    [[TMP0:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG20:![0-9]+]], !tbaa [[TBAA11]]
 // CHECK-LIFETIME-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[P]]), !dbg [[DBG21:![0-9]+]]
 // CHECK-LIFETIME-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[X]]) #[[ATTR2]], !dbg [[DBG21]]
 // CHECK-LIFETIME-NEXT:    ret i32 [[TMP0]], !dbg [[DBG22:![0-9]+]]
@@ -49,7 +49,7 @@
 // CHECK-MSAN-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr, !dbg [[DBG15]]
 // CHECK-MSAN-NEXT:    store i64 0, ptr [[TMP5]], align 8, !dbg [[DBG16:![0-9]+]]
 // CHECK-MSAN-NEXT:    store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG16]], !tbaa [[TBAA17:![0-9]+]]
-// CHECK-MSAN-NEXT:    [[P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]]
+// CHECK-MSAN-NEXT:    [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]]
 // CHECK-MSAN-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP5]], align 8, !dbg [[DBG19]]
 // CHECK-MSAN-NEXT:    [[_MSCMP_NOT:%.*]] = icmp eq i64 [[_MSLD]], 0, !dbg [[DBG20:![0-9]+]]
 // CHECK-MSAN-NEXT:    br i1 [[_MSCMP_NOT]], label [[TMP7:%.*]], label [[TMP6:%.*]], !dbg [[DBG20]], !prof [[PROF21:![0-9]+]]
@@ -57,8 +57,8 @@
 // CHECK-MSAN-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]], !dbg [[DBG20]]
 // CHECK-MSAN-NEXT:    unreachable, !dbg [[DBG20]]
 // CHECK-MSAN:       7:
-// CHECK-MSAN-NEXT:    [[TMP8:%.*]] = load i32, ptr [[P_0_P_0_P_0_]], align 4, !dbg [[DBG20]], !tbaa [[TBAA11]]
-// CHECK-MSAN-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[P_0_P_0_P_0_]] to i64, !dbg [[DBG20]]
+// CHECK-MSAN-NEXT:    [[TMP8:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG20]], !tbaa [[TBAA11]]
+// CHECK-MSAN-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[P_0_P_0_P_0_P_0_]] to i64, !dbg [[DBG20]]
 // CHECK-MSAN-NEXT:    [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080, !dbg [[DBG20]]
 // CHECK-MSAN-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr, !dbg [[DBG20]]
 // CHECK-MSAN-NEXT:    [[_MSLD1:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG20]]
@@ -84,7 +84,7 @@
 // CHECK-KMSAN-NEXT:    [[TMP4:%.*]] = extractvalue { ptr, ptr } [[TMP3]], 0, !dbg [[DBG16]]
 // CHECK-KMSAN-NEXT:    store i64 0, ptr [[TMP4]], align 8, !dbg [[DBG16]]
 // CHECK-KMSAN-NEXT:    store volatile ptr [[X]], ptr [[P]], align 8, !dbg [[DBG16]], !tbaa [[TBAA17:![0-9]+]]
-// CHECK-KMSAN-NEXT:    [[P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]]
+// CHECK-KMSAN-NEXT:    [[P_0_P_0_P_0_P_0_:%.*]] = load volatile ptr, ptr [[P]], align 8, !dbg [[DBG19:![0-9]+]], !tbaa [[TBAA17]]
 // CHECK-KMSAN-NEXT:    [[TMP5:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_8(ptr nonnull [[P]]) #[[ATTR2]], !dbg [[DBG19]]
 // CHECK-KMSAN-NEXT:    [[TMP6:%.*]] = extractvalue { ptr, ptr } [[TMP5]], 0, !dbg [[DBG19]]
 // CHECK-KMSAN-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8, !dbg [[DBG19]]
@@ -98,8 +98,8 @@
 // CHECK-KMSAN:       10:
 // CHECK-KMSAN-NEXT:    [[RETVAL_ORIGIN:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i64 0, i32 6
 // CHECK-KMSAN-NEXT:    [[RETVAL_SHADOW:%.*]] = getelementptr { [100 x i64], [100 x i64], [100 x i64], [100 x i64], i64, [200 x i32], i32, i32 }, ptr [[TMP0]], i64 0, i32 1
-// CHECK-KMSAN-NEXT:    [[TMP11:%.*]] = load i32, ptr [[P_0_P_0_P_0_]], align 4, !dbg [[DBG20]], !tbaa [[TBAA11]]
-// CHECK-KMSAN-NEXT:    [[TMP12:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_4(ptr nonnull [[P_0_P_0_P_0_]]) #[[ATTR2]], !dbg [[DBG20]]
+// CHECK-KMSAN-NEXT:    [[TMP11:%.*]] = load i32, ptr [[P_0_P_0_P_0_P_0_]], align 4, !dbg [[DBG20]], !tbaa [[TBAA11]]
+// CHECK-KMSAN-NEXT:    [[TMP12:%.*]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_4(ptr nonnull [[P_0_P_0_P_0_P_0_]]) #[[ATTR2]], !dbg [[DBG20]]
 // CHECK-KMSAN-NEXT:    [[TMP13:%.*]] = extractvalue { ptr, ptr } [[TMP12]], 0, !dbg [[DBG20]]
 // CHECK-KMSAN-NEXT:    [[TMP14:%.*]] = extractvalue { ptr, ptr } [[TMP12]], 1, !dbg [[DBG20]]
 // CHECK-KMSAN-NEXT:    [[_MSLD1:%.*]] = load i32, ptr [[TMP13]], align 4, !dbg [[DBG20]]

diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index f978b2cefe9e3..d4e73bd8bfb32 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1107,6 +1107,10 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
         Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
         PTO.ForgetAllSCEVInLoopUnroll)));
     FPM.addPass(WarnMissedTransformationsPass());
+    // Now that we are done with loop unrolling, be it either by LoopVectorizer,
+    // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
+    // become constant-offset, thus enabling SROA and alloca promotion. Do so.
+    FPM.addPass(SROAPass());
   }
 
   if (!IsFullLTO) {
@@ -1194,6 +1198,10 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
         Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
         PTO.ForgetAllSCEVInLoopUnroll)));
     FPM.addPass(WarnMissedTransformationsPass());
+    // Now that we are done with loop unrolling, be it either by LoopVectorizer,
+    // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
+    // become constant-offset, thus enabling SROA and alloca promotion. Do so.
+    FPM.addPass(SROAPass());
     FPM.addPass(InstCombinePass());
     FPM.addPass(
         RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());

diff  --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 162663a0aa969..303e164d0cfa0 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -254,6 +254,7 @@
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
 ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass
+; CHECK-O-NEXT: Running pass: SROAPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
 ; CHECK-O-NEXT: Running pass: LoopSimplifyPass

diff  --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index 6432d806ad9c1..bbed7fbf3c07f 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -127,6 +127,7 @@
 ; CHECK-O23SZ-NEXT: Running analysis: DemandedBitsAnalysis on foo
 ; CHECK-O23SZ-NEXT: Running pass: LoopUnrollPass on foo
 ; CHECK-O23SZ-NEXT: WarnMissedTransformationsPass on foo
+; CHECK-O23SZ-NEXT: Running pass: SROAPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo
 ; CHECK-O23SZ-NEXT: Running pass: SimplifyCFGPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: SCCPPass on foo

diff  --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll
index 7ef6ac1d5f944..5b958348be333 100644
--- a/llvm/test/Other/new-pm-thinlto-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -231,6 +231,7 @@
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass
+; CHECK-POSTLINK-O-NEXT: Running pass: SROAPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopSimplifyPass

diff  --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index f93cac9276ee8..e67b23d18fbd4 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -175,6 +175,7 @@
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
 ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass
+; CHECK-O-NEXT: Running pass: SROAPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
 ; CHECK-O-NEXT: Running pass: LoopSimplifyPass

diff  --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index 498c9fbf53321..01204ecc065d4 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -187,6 +187,7 @@
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
 ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass
+; CHECK-O-NEXT: Running pass: SROAPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
 ; CHECK-O-NEXT: Running pass: LoopSimplifyPass

diff  --git a/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll b/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
index 082ccd8100208..171fe16acb104 100644
--- a/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
+++ b/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
@@ -38,37 +38,26 @@ cleanup:
 define i32 @main() {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i8*, align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8* @allocate(i32 12)
-; CHECK-NEXT:    store i8* [[TMP1]], i8** [[TMP0]], align 8
-; CHECK-NEXT:    [[N_SPILL_ADDR_I:%.*]] = bitcast i8* [[TMP1]] to i32*
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8* @allocate(i32 12)
+; CHECK-NEXT:    [[N_SPILL_ADDR_I:%.*]] = bitcast i8* [[TMP0]] to i32*
 ; CHECK-NEXT:    store i32 1, i32* [[N_SPILL_ADDR_I]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8** [[TMP0]] to %f.Frame**
-; CHECK-NEXT:    [[N_VAL3_SPILL_ADDR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[N_VAL3_SPILL_ADDR_I]] to i32*
-; CHECK-NEXT:    store i32 1, i32* [[TMP3]], align 4, !noalias !0
-; CHECK-NEXT:    [[INPUT_SPILL_ADDR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 8
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[INPUT_SPILL_ADDR_I]] to i32*
-; CHECK-NEXT:    store i32 2, i32* [[TMP4]], align 4, !noalias !0
-; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
-; CHECK-NEXT:    [[FRAMEPTR_I1:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !3
-; CHECK-NEXT:    [[INPUT_RELOAD_ADDR13_I:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], %f.Frame* [[FRAMEPTR_I1]], i64 0, i32 2
-; CHECK-NEXT:    [[INPUT_RELOAD14_I:%.*]] = load i32, i32* [[INPUT_RELOAD_ADDR13_I]], align 4, !noalias !3
-; CHECK-NEXT:    [[N_VAL3_RELOAD_ADDR11_I:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I1]], i64 0, i32 1
-; CHECK-NEXT:    [[N_VAL3_RELOAD12_I:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR11_I]], align 4, !noalias !3
-; CHECK-NEXT:    [[SUM7_I:%.*]] = add i32 [[N_VAL3_RELOAD12_I]], [[INPUT_RELOAD14_I]]
-; CHECK-NEXT:    store i32 [[SUM7_I]], i32* [[N_VAL3_RELOAD_ADDR11_I]], align 4, !noalias !3
-; CHECK-NEXT:    store i32 4, i32* [[INPUT_RELOAD_ADDR13_I]], align 4, !noalias !3
-; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
-; CHECK-NEXT:    [[FRAMEPTR_I2:%.*]] = load %f.Frame*, %f.Frame** [[TMP2]], align 8, !alias.scope !6
-; CHECK-NEXT:    [[INPUT_RELOAD_ADDR13_I3:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I2]], i64 0, i32 2
-; CHECK-NEXT:    [[INPUT_RELOAD14_I4:%.*]] = load i32, i32* [[INPUT_RELOAD_ADDR13_I3]], align 4, !noalias !6
-; CHECK-NEXT:    [[N_VAL3_RELOAD_ADDR11_I5:%.*]] = getelementptr inbounds [[F_FRAME]], %f.Frame* [[FRAMEPTR_I2]], i64 0, i32 1
-; CHECK-NEXT:    [[N_VAL3_RELOAD12_I6:%.*]] = load i32, i32* [[N_VAL3_RELOAD_ADDR11_I5]], align 4, !noalias !6
-; CHECK-NEXT:    [[SUM7_I7:%.*]] = add i32 [[N_VAL3_RELOAD12_I6]], [[INPUT_RELOAD14_I4]]
+; CHECK-NEXT:    [[N_VAL3_SPILL_ADDR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[N_VAL3_SPILL_ADDR_I]] to i32*
+; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 4, !noalias !0
+; CHECK-NEXT:    [[INPUT_SPILL_ADDR_I:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 8
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[INPUT_SPILL_ADDR_I]] to i32*
+; CHECK-NEXT:    store i32 2, i32* [[TMP2]], align 4, !noalias !0
+; CHECK-NEXT:    [[INPUT_RELOAD_ADDR13_I:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 8
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[INPUT_RELOAD_ADDR13_I]] to i32*
+; CHECK-NEXT:    [[N_VAL3_RELOAD_ADDR11_I:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 4
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[N_VAL3_RELOAD_ADDR11_I]] to i32*
+; CHECK-NEXT:    [[N_VAL3_RELOAD12_I:%.*]] = load i32, i32* [[TMP4]], align 4, !noalias !3
+; CHECK-NEXT:    [[SUM7_I:%.*]] = add i32 [[N_VAL3_RELOAD12_I]], 2
+; CHECK-NEXT:    store i32 [[SUM7_I]], i32* [[TMP4]], align 4, !noalias !3
+; CHECK-NEXT:    store i32 4, i32* [[TMP3]], align 4, !noalias !3
+; CHECK-NEXT:    [[SUM7_I7:%.*]] = add i32 [[N_VAL3_RELOAD12_I]], 6
 ; CHECK-NEXT:    tail call void @print(i32 [[SUM7_I7]]), !noalias !6
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %f.Frame* [[FRAMEPTR_I2]] to i8*
-; CHECK-NEXT:    tail call void @deallocate(i8* [[TMP5]]), !noalias !6
+; CHECK-NEXT:    tail call void @deallocate(i8* [[TMP0]]), !noalias !6
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
index 7fbd2750548f7..818af8a596c58 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
@@ -22,43 +22,35 @@ $_ZNSt14__array_traitsIiLm2EE6_S_refERA2_Kim = comdat any
 define dso_local void @foo(i32 noundef %arg, ptr noundef nonnull align 4 dereferenceable(8) %arg1) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[I3:%.*]] = alloca [[T0:%.*]], align 8
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[I3]]) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT:    store i64 180388626456, ptr [[I3]], align 8
 ; CHECK-NEXT:    [[I9:%.*]] = sdiv i32 [[ARG:%.*]], 128
 ; CHECK-NEXT:    [[I10:%.*]] = shl nsw i32 [[I9]], 7
 ; CHECK-NEXT:    [[ARG_OFF:%.*]] = add i32 [[ARG]], 127
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[ARG_OFF]], 255
-; CHECK-NEXT:    br i1 [[TMP0]], label [[BB12:%.*]], label [[BB13_PREHEADER:%.*]]
-; CHECK:       bb13.preheader:
-; CHECK-NEXT:    [[I5_I_I_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[I3]], i64 0, i64 1
-; CHECK-NEXT:    [[I3_PROMOTED:%.*]] = load i32, ptr [[I3]], align 8, !tbaa [[TBAA5:![0-9]+]]
-; CHECK-NEXT:    [[I5_I_I_1_PROMOTED:%.*]] = load i32, ptr [[I5_I_I_1]], align 4, !tbaa [[TBAA5]]
-; CHECK-NEXT:    br label [[BB13:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[BB12:%.*]], label [[BB13:%.*]]
 ; CHECK:       bb12.loopexit:
-; CHECK-NEXT:    store i32 [[I21_2:%.*]], ptr [[I3]], align 8, !tbaa [[TBAA5]]
-; CHECK-NEXT:    store i32 [[I21_3:%.*]], ptr [[I5_I_I_1]], align 4, !tbaa [[TBAA5]]
-; CHECK-NEXT:    [[DOTPRE:%.*]] = load i64, ptr [[I3]], align 8, !tbaa [[TBAA9:![0-9]+]]
+; CHECK-NEXT:    [[I3_SROA_8_0_INSERT_EXT:%.*]] = zext i32 [[I21_3:%.*]] to i64
+; CHECK-NEXT:    [[I3_SROA_8_0_INSERT_SHIFT:%.*]] = shl nuw i64 [[I3_SROA_8_0_INSERT_EXT]], 32
+; CHECK-NEXT:    [[I3_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[I21_2:%.*]] to i64
+; CHECK-NEXT:    [[I3_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[I3_SROA_8_0_INSERT_SHIFT]], [[I3_SROA_0_0_INSERT_EXT]]
 ; CHECK-NEXT:    br label [[BB12]]
 ; CHECK:       bb12:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ [[DOTPRE]], [[BB12_LOOPEXIT:%.*]] ], [ 180388626456, [[BB:%.*]] ]
-; CHECK-NEXT:    store i64 [[TMP1]], ptr [[ARG1:%.*]], align 4, !tbaa [[TBAA9]]
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[I3]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], [[BB12_LOOPEXIT:%.*]] ], [ 180388626456, [[BB:%.*]] ]
+; CHECK-NEXT:    store i64 [[TMP1]], ptr [[ARG1:%.*]], align 4, !tbaa [[TBAA5:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb13:
-; CHECK-NEXT:    [[I21_113:%.*]] = phi i32 [ [[I5_I_I_1_PROMOTED]], [[BB13_PREHEADER]] ], [ [[I21_3]], [[BB13]] ]
-; CHECK-NEXT:    [[I20_212:%.*]] = phi i32 [ [[I3_PROMOTED]], [[BB13_PREHEADER]] ], [ [[I21_2]], [[BB13]] ]
-; CHECK-NEXT:    [[I4_05:%.*]] = phi i32 [ 0, [[BB13_PREHEADER]] ], [ [[I24_3:%.*]], [[BB13]] ]
-; CHECK-NEXT:    [[I21:%.*]] = mul nsw i32 [[I20_212]], [[I4_05]]
+; CHECK-NEXT:    [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], [[BB13]] ], [ 42, [[BB]] ]
+; CHECK-NEXT:    [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], [[BB13]] ], [ 24, [[BB]] ]
+; CHECK-NEXT:    [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], [[BB13]] ], [ 0, [[BB]] ]
+; CHECK-NEXT:    [[I21:%.*]] = mul nsw i32 [[I3_SROA_0_0]], [[I4_05]]
 ; CHECK-NEXT:    [[I24:%.*]] = or i32 [[I4_05]], 1
-; CHECK-NEXT:    [[I21_1:%.*]] = mul nsw i32 [[I21_113]], [[I24]]
+; CHECK-NEXT:    [[I21_1:%.*]] = mul nsw i32 [[I3_SROA_8_0]], [[I24]]
 ; CHECK-NEXT:    [[I24_1:%.*]] = or i32 [[I4_05]], 2
 ; CHECK-NEXT:    [[I21_2]] = mul nsw i32 [[I21]], [[I24_1]]
 ; CHECK-NEXT:    [[I24_2:%.*]] = or i32 [[I4_05]], 3
 ; CHECK-NEXT:    [[I21_3]] = mul nsw i32 [[I21_1]], [[I24_2]]
 ; CHECK-NEXT:    [[I24_3]] = add nuw nsw i32 [[I4_05]], 4
 ; CHECK-NEXT:    [[I11_NOT_3:%.*]] = icmp eq i32 [[I24_3]], [[I10]]
-; CHECK-NEXT:    br i1 [[I11_NOT_3]], label [[BB12_LOOPEXIT]], label [[BB13]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT:    br i1 [[I11_NOT_3]], label [[BB12_LOOPEXIT]], label [[BB13]], !llvm.loop [[LOOP8:![0-9]+]]
 ;
 bb:
   %i = alloca i32, align 4

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling.ll b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling.ll
index 35c867d26d160..df73ceea94ea9 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling.ll
@@ -13,38 +13,10 @@ target triple = "x86_64-pc-linux-gnu"
 define void @wibble(ptr %arg) personality ptr null {
 ; CHECK-LABEL: @wibble(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[I1:%.*]] = alloca [[T1:%.*]], align 16
 ; CHECK-NEXT:    [[I10_3_I_PRE:%.*]] = load i8, ptr [[ARG:%.*]], align 1
-; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i8> poison, i8 [[I10_3_I_PRE]], i64 3
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [64 x i8], ptr [[ARG]], i64 0, i64 1
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[VECTOR_RECUR_INIT]], <4 x i8> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[I1]], align 16
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [16 x i32], ptr [[I1]], i64 0, i64 4
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr [64 x i8], ptr [[ARG]], i64 0, i64 5
-; CHECK-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> [[WIDE_LOAD_1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP7:%.*]] = or <4 x i8> [[TMP6]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP8:%.*]] = zext <4 x i8> [[TMP7]] to <4 x i32>
-; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr [[TMP4]], align 16
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [16 x i32], ptr [[I1]], i64 0, i64 8
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr [64 x i8], ptr [[ARG]], i64 0, i64 9
-; CHECK-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD_1]], <4 x i8> [[WIDE_LOAD_2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP12:%.*]] = or <4 x i8> [[TMP11]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP13:%.*]] = zext <4 x i8> [[TMP12]] to <4 x i32>
-; CHECK-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP9]], align 16
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [16 x i32], ptr [[I1]], i64 0, i64 12
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr [64 x i8], ptr [[ARG]], i64 0, i64 13
-; CHECK-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i8>, ptr [[TMP15]], align 1
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD_2]], <4 x i8> [[WIDE_LOAD_3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP17:%.*]] = or <4 x i8> [[TMP16]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP18:%.*]] = zext <4 x i8> [[TMP17]] to <4 x i32>
-; CHECK-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP14]], align 16
-; CHECK-NEXT:    [[I3_I_I:%.*]] = load i32, ptr [[I1]], align 16
-; CHECK-NEXT:    [[I4_I_I:%.*]] = add i32 [[I3_I_I]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = or i8 [[I10_3_I_PRE]], 1
+; CHECK-NEXT:    [[I1_SROA_0_0_VEC_EXTRACT:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[I4_I_I:%.*]] = add nuw nsw i32 [[I1_SROA_0_0_VEC_EXTRACT]], 1
 ; CHECK-NEXT:    store i32 [[I4_I_I]], ptr [[ARG]], align 4
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll b/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll
index 127139d24fac4..5a2c4b90ce00c 100644
--- a/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll
+++ b/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll
@@ -13,17 +13,17 @@ define i16 @helper(i16 %0, i64 %x) {
 ; CHECK-NEXT:    store i16 [[TMP0:%.*]], ptr [[DATA]], align 2
 ; CHECK-NEXT:    br label [[BB6_I_I:%.*]]
 ; CHECK:       bb6.i.i:
-; CHECK-NEXT:    [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP2:%.*]], [[BB6_I_I]] ], [ 0, [[START:%.*]] ]
+; CHECK-NEXT:    [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP1:%.*]], [[BB6_I_I]] ], [ 0, [[START:%.*]] ]
 ; CHECK-NEXT:    [[_40_I_I:%.*]] = sub nsw i64 0, [[ITER_SROA_0_07_I_I]]
-; CHECK-NEXT:    [[TMP2]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1
+; CHECK-NEXT:    [[TMP1]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1
 ; CHECK-NEXT:    [[_34_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[DATA]], i64 0, i64 [[ITER_SROA_0_07_I_I]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [0 x i8], ptr [[DATA]], i64 0, i64 [[_40_I_I]]
-; CHECK-NEXT:    [[_39_I_I:%.*]] = getelementptr i8, ptr [[TMP1:%.*]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [0 x i8], ptr [[DATA]], i64 0, i64 [[_40_I_I]]
+; CHECK-NEXT:    [[_39_I_I:%.*]] = getelementptr i8, ptr [[TMP2]], i64 1
 ; CHECK-NEXT:    [[TMP_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_34_I_I]], align 1
 ; CHECK-NEXT:    [[TMP2_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_39_I_I]], align 1
 ; CHECK-NEXT:    store i8 [[TMP2_0_COPYLOAD_I_I_I_I]], ptr [[_34_I_I]], align 1
 ; CHECK-NEXT:    store i8 [[TMP_0_COPYLOAD_I_I_I_I]], ptr [[_39_I_I]], align 1
-; CHECK-NEXT:    [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT_I_I]], label [[EXIT:%.*]], label [[BB6_I_I]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[DOTSROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[DATA]], align 2