[llvm] 83974a4 - Revert "[LoopUnroll] Clamp PartialThreshold for large LoopMicroOpBufferSize (#67657)"

Dmitri Gribenko via llvm-commits llvm-commits at lists.llvm.org
Thu May 16 03:14:29 PDT 2024


Author: Dmitri Gribenko
Date: 2024-05-16T12:11:42+02:00
New Revision: 83974a4b92d1fd33b8e21d7a868862893d9430e9

URL: https://github.com/llvm/llvm-project/commit/83974a4b92d1fd33b8e21d7a868862893d9430e9
DIFF: https://github.com/llvm/llvm-project/commit/83974a4b92d1fd33b8e21d7a868862893d9430e9.diff

LOG: Revert "[LoopUnroll] Clamp PartialThreshold for large LoopMicroOpBufferSize (#67657)"

This reverts commit f0b3654701bde1cf7821d60698b42383edaff9f3.

This commit triggers UB by reading an uninitialized variable.

`UP.PartialThreshold` is used uninitialized in `getUnrollingPreferences()` when
it is called from `LoopVectorizationPlanner::executePlan()`. In this case the
`UP` variable is created on the stack and its fields are not initialized.

```
==8802==WARNING: MemorySanitizer: use-of-uninitialized-value
    #0 0x557c0b081b99 in llvm::BasicTTIImplBase<llvm::X86TTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&, llvm::OptimizationRemarkEmitter*) llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h
    #1 0x557c0b07a40c in llvm::TargetTransformInfo::Model<llvm::X86TTIImpl>::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&, llvm::OptimizationRemarkEmitter*) llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h:2277:17
    #2 0x557c0f5d69ee in llvm::TargetTransformInfo::getUnrollingPreferences(llvm::Loop*, llvm::ScalarEvolution&, llvm::TargetTransformInfo::UnrollingPreferences&, llvm::OptimizationRemarkEmitter*) const llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp:387:19
    #3 0x557c0e6b96a0 in llvm::LoopVectorizationPlanner::executePlan(llvm::ElementCount, unsigned int, llvm::VPlan&, llvm::InnerLoopVectorizer&, llvm::DominatorTree*, bool, llvm::DenseMap<llvm::SCEV const*, llvm::Value*, llvm::DenseMapInfo<llvm::SCEV const*, void>, llvm::detail::DenseMapPair<llvm::SCEV const*, llvm::Value*>> const*) llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp:7624:7
    #4 0x557c0e6e4b63 in llvm::LoopVectorizePass::processLoop(llvm::Loop*) llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp:10253:13
    #5 0x557c0e6f2429 in llvm::LoopVectorizePass::runImpl(llvm::Function&, llvm::ScalarEvolution&, llvm::LoopInfo&, llvm::TargetTransformInfo&, llvm::DominatorTree&, llvm::BlockFrequencyInfo*, llvm::TargetLibraryInfo*, llvm::DemandedBits&, llvm::AssumptionCache&, llvm::LoopAccessInfoManager&, llvm::OptimizationRemarkEmitter&, llvm::ProfileSummaryInfo*) llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp:10344:30
    #6 0x557c0e6f2f97 in llvm::LoopVectorizePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp:10383:9

[...]

  Uninitialized value was created by an allocation of 'UP' in the stack frame
    #0 0x557c0e6b961e in llvm::LoopVectorizationPlanner::executePlan(llvm::ElementCount, unsigned int, llvm::VPlan&, llvm::InnerLoopVectorizer&, llvm::DominatorTree*, bool, llvm::DenseMap<llvm::SCEV const*, llvm::Value*, llvm::DenseMapInfo<llvm::SCEV const*, void>, llvm::detail::DenseMapPair<llvm::SCEV const*, llvm::Value*>> const*) llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp:7623:3
```

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/test/Transforms/LoopUnroll/X86/znver3.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 8dba6a6412852..2091432d4fe27 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -612,13 +612,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     if (PartialUnrollingThreshold.getNumOccurrences() > 0)
       MaxOps = PartialUnrollingThreshold;
     else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
-      // Upper bound by the default PartialThreshold, which is the same as
-      // the default full-unroll Threshold. Even if the loop micro-op buffer
-      // is very large, this does not mean that we want to unroll all loops
-      // to that length, as it would increase code size beyond the limits of
-      // what unrolling normally allows.
-      MaxOps = std::min(ST->getSchedModel().LoopMicroOpBufferSize,
-                        UP.PartialThreshold);
+      MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
     else
       return;
 

diff  --git a/llvm/test/Transforms/LoopUnroll/X86/znver3.ll b/llvm/test/Transforms/LoopUnroll/X86/znver3.ll
index 467c57906d888..30389062a0967 100644
--- a/llvm/test/Transforms/LoopUnroll/X86/znver3.ll
+++ b/llvm/test/Transforms/LoopUnroll/X86/znver3.ll
@@ -9,8 +9,8 @@ define i32 @test(ptr %ary) "target-cpu"="znver3" {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_31:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_31:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_127:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_127:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[SUM_NEXT:%.*]] = add nsw i32 [[VAL]], [[SUM]]
@@ -137,12 +137,396 @@ define i32 @test(ptr %ary) "target-cpu"="znver3" {
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_30:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 31
 ; CHECK-NEXT:    [[ARRAYIDX_31:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_30]]
 ; CHECK-NEXT:    [[VAL_31:%.*]] = load i32, ptr [[ARRAYIDX_31]], align 4
-; CHECK-NEXT:    [[SUM_NEXT_31]] = add nsw i32 [[VAL_31]], [[SUM_NEXT_30]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_31]] = add nuw nsw i64 [[INDVARS_IV]], 32
-; CHECK-NEXT:    [[EXITCOND_NOT_31:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_31]], 8192
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT_31]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    [[SUM_NEXT_31:%.*]] = add nsw i32 [[VAL_31]], [[SUM_NEXT_30]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_31:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 32
+; CHECK-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_31]]
+; CHECK-NEXT:    [[VAL_32:%.*]] = load i32, ptr [[ARRAYIDX_32]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_32:%.*]] = add nsw i32 [[VAL_32]], [[SUM_NEXT_31]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_32:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 33
+; CHECK-NEXT:    [[ARRAYIDX_33:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_32]]
+; CHECK-NEXT:    [[VAL_33:%.*]] = load i32, ptr [[ARRAYIDX_33]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_33:%.*]] = add nsw i32 [[VAL_33]], [[SUM_NEXT_32]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_33:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 34
+; CHECK-NEXT:    [[ARRAYIDX_34:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_33]]
+; CHECK-NEXT:    [[VAL_34:%.*]] = load i32, ptr [[ARRAYIDX_34]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_34:%.*]] = add nsw i32 [[VAL_34]], [[SUM_NEXT_33]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_34:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 35
+; CHECK-NEXT:    [[ARRAYIDX_35:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_34]]
+; CHECK-NEXT:    [[VAL_35:%.*]] = load i32, ptr [[ARRAYIDX_35]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_35:%.*]] = add nsw i32 [[VAL_35]], [[SUM_NEXT_34]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_35:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 36
+; CHECK-NEXT:    [[ARRAYIDX_36:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_35]]
+; CHECK-NEXT:    [[VAL_36:%.*]] = load i32, ptr [[ARRAYIDX_36]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_36:%.*]] = add nsw i32 [[VAL_36]], [[SUM_NEXT_35]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_36:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 37
+; CHECK-NEXT:    [[ARRAYIDX_37:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_36]]
+; CHECK-NEXT:    [[VAL_37:%.*]] = load i32, ptr [[ARRAYIDX_37]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_37:%.*]] = add nsw i32 [[VAL_37]], [[SUM_NEXT_36]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_37:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 38
+; CHECK-NEXT:    [[ARRAYIDX_38:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_37]]
+; CHECK-NEXT:    [[VAL_38:%.*]] = load i32, ptr [[ARRAYIDX_38]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_38:%.*]] = add nsw i32 [[VAL_38]], [[SUM_NEXT_37]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_38:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 39
+; CHECK-NEXT:    [[ARRAYIDX_39:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_38]]
+; CHECK-NEXT:    [[VAL_39:%.*]] = load i32, ptr [[ARRAYIDX_39]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_39:%.*]] = add nsw i32 [[VAL_39]], [[SUM_NEXT_38]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_39:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 40
+; CHECK-NEXT:    [[ARRAYIDX_40:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_39]]
+; CHECK-NEXT:    [[VAL_40:%.*]] = load i32, ptr [[ARRAYIDX_40]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_40:%.*]] = add nsw i32 [[VAL_40]], [[SUM_NEXT_39]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_40:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 41
+; CHECK-NEXT:    [[ARRAYIDX_41:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_40]]
+; CHECK-NEXT:    [[VAL_41:%.*]] = load i32, ptr [[ARRAYIDX_41]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_41:%.*]] = add nsw i32 [[VAL_41]], [[SUM_NEXT_40]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_41:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 42
+; CHECK-NEXT:    [[ARRAYIDX_42:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_41]]
+; CHECK-NEXT:    [[VAL_42:%.*]] = load i32, ptr [[ARRAYIDX_42]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_42:%.*]] = add nsw i32 [[VAL_42]], [[SUM_NEXT_41]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_42:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 43
+; CHECK-NEXT:    [[ARRAYIDX_43:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_42]]
+; CHECK-NEXT:    [[VAL_43:%.*]] = load i32, ptr [[ARRAYIDX_43]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_43:%.*]] = add nsw i32 [[VAL_43]], [[SUM_NEXT_42]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_43:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 44
+; CHECK-NEXT:    [[ARRAYIDX_44:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_43]]
+; CHECK-NEXT:    [[VAL_44:%.*]] = load i32, ptr [[ARRAYIDX_44]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_44:%.*]] = add nsw i32 [[VAL_44]], [[SUM_NEXT_43]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_44:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 45
+; CHECK-NEXT:    [[ARRAYIDX_45:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_44]]
+; CHECK-NEXT:    [[VAL_45:%.*]] = load i32, ptr [[ARRAYIDX_45]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_45:%.*]] = add nsw i32 [[VAL_45]], [[SUM_NEXT_44]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_45:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 46
+; CHECK-NEXT:    [[ARRAYIDX_46:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_45]]
+; CHECK-NEXT:    [[VAL_46:%.*]] = load i32, ptr [[ARRAYIDX_46]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_46:%.*]] = add nsw i32 [[VAL_46]], [[SUM_NEXT_45]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_46:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 47
+; CHECK-NEXT:    [[ARRAYIDX_47:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_46]]
+; CHECK-NEXT:    [[VAL_47:%.*]] = load i32, ptr [[ARRAYIDX_47]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_47:%.*]] = add nsw i32 [[VAL_47]], [[SUM_NEXT_46]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_47:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 48
+; CHECK-NEXT:    [[ARRAYIDX_48:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_47]]
+; CHECK-NEXT:    [[VAL_48:%.*]] = load i32, ptr [[ARRAYIDX_48]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_48:%.*]] = add nsw i32 [[VAL_48]], [[SUM_NEXT_47]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_48:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 49
+; CHECK-NEXT:    [[ARRAYIDX_49:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_48]]
+; CHECK-NEXT:    [[VAL_49:%.*]] = load i32, ptr [[ARRAYIDX_49]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_49:%.*]] = add nsw i32 [[VAL_49]], [[SUM_NEXT_48]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_49:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 50
+; CHECK-NEXT:    [[ARRAYIDX_50:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_49]]
+; CHECK-NEXT:    [[VAL_50:%.*]] = load i32, ptr [[ARRAYIDX_50]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_50:%.*]] = add nsw i32 [[VAL_50]], [[SUM_NEXT_49]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_50:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 51
+; CHECK-NEXT:    [[ARRAYIDX_51:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_50]]
+; CHECK-NEXT:    [[VAL_51:%.*]] = load i32, ptr [[ARRAYIDX_51]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_51:%.*]] = add nsw i32 [[VAL_51]], [[SUM_NEXT_50]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_51:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 52
+; CHECK-NEXT:    [[ARRAYIDX_52:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_51]]
+; CHECK-NEXT:    [[VAL_52:%.*]] = load i32, ptr [[ARRAYIDX_52]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_52:%.*]] = add nsw i32 [[VAL_52]], [[SUM_NEXT_51]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_52:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 53
+; CHECK-NEXT:    [[ARRAYIDX_53:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_52]]
+; CHECK-NEXT:    [[VAL_53:%.*]] = load i32, ptr [[ARRAYIDX_53]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_53:%.*]] = add nsw i32 [[VAL_53]], [[SUM_NEXT_52]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_53:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 54
+; CHECK-NEXT:    [[ARRAYIDX_54:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_53]]
+; CHECK-NEXT:    [[VAL_54:%.*]] = load i32, ptr [[ARRAYIDX_54]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_54:%.*]] = add nsw i32 [[VAL_54]], [[SUM_NEXT_53]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_54:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 55
+; CHECK-NEXT:    [[ARRAYIDX_55:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_54]]
+; CHECK-NEXT:    [[VAL_55:%.*]] = load i32, ptr [[ARRAYIDX_55]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_55:%.*]] = add nsw i32 [[VAL_55]], [[SUM_NEXT_54]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_55:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 56
+; CHECK-NEXT:    [[ARRAYIDX_56:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_55]]
+; CHECK-NEXT:    [[VAL_56:%.*]] = load i32, ptr [[ARRAYIDX_56]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_56:%.*]] = add nsw i32 [[VAL_56]], [[SUM_NEXT_55]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_56:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 57
+; CHECK-NEXT:    [[ARRAYIDX_57:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_56]]
+; CHECK-NEXT:    [[VAL_57:%.*]] = load i32, ptr [[ARRAYIDX_57]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_57:%.*]] = add nsw i32 [[VAL_57]], [[SUM_NEXT_56]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_57:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 58
+; CHECK-NEXT:    [[ARRAYIDX_58:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_57]]
+; CHECK-NEXT:    [[VAL_58:%.*]] = load i32, ptr [[ARRAYIDX_58]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_58:%.*]] = add nsw i32 [[VAL_58]], [[SUM_NEXT_57]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_58:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 59
+; CHECK-NEXT:    [[ARRAYIDX_59:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_58]]
+; CHECK-NEXT:    [[VAL_59:%.*]] = load i32, ptr [[ARRAYIDX_59]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_59:%.*]] = add nsw i32 [[VAL_59]], [[SUM_NEXT_58]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_59:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 60
+; CHECK-NEXT:    [[ARRAYIDX_60:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_59]]
+; CHECK-NEXT:    [[VAL_60:%.*]] = load i32, ptr [[ARRAYIDX_60]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_60:%.*]] = add nsw i32 [[VAL_60]], [[SUM_NEXT_59]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_60:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 61
+; CHECK-NEXT:    [[ARRAYIDX_61:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_60]]
+; CHECK-NEXT:    [[VAL_61:%.*]] = load i32, ptr [[ARRAYIDX_61]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_61:%.*]] = add nsw i32 [[VAL_61]], [[SUM_NEXT_60]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_61:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 62
+; CHECK-NEXT:    [[ARRAYIDX_62:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_61]]
+; CHECK-NEXT:    [[VAL_62:%.*]] = load i32, ptr [[ARRAYIDX_62]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_62:%.*]] = add nsw i32 [[VAL_62]], [[SUM_NEXT_61]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_62:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 63
+; CHECK-NEXT:    [[ARRAYIDX_63:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_62]]
+; CHECK-NEXT:    [[VAL_63:%.*]] = load i32, ptr [[ARRAYIDX_63]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_63:%.*]] = add nsw i32 [[VAL_63]], [[SUM_NEXT_62]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_63:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 64
+; CHECK-NEXT:    [[ARRAYIDX_64:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_63]]
+; CHECK-NEXT:    [[VAL_64:%.*]] = load i32, ptr [[ARRAYIDX_64]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_64:%.*]] = add nsw i32 [[VAL_64]], [[SUM_NEXT_63]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_64:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 65
+; CHECK-NEXT:    [[ARRAYIDX_65:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_64]]
+; CHECK-NEXT:    [[VAL_65:%.*]] = load i32, ptr [[ARRAYIDX_65]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_65:%.*]] = add nsw i32 [[VAL_65]], [[SUM_NEXT_64]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_65:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 66
+; CHECK-NEXT:    [[ARRAYIDX_66:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_65]]
+; CHECK-NEXT:    [[VAL_66:%.*]] = load i32, ptr [[ARRAYIDX_66]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_66:%.*]] = add nsw i32 [[VAL_66]], [[SUM_NEXT_65]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_66:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 67
+; CHECK-NEXT:    [[ARRAYIDX_67:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_66]]
+; CHECK-NEXT:    [[VAL_67:%.*]] = load i32, ptr [[ARRAYIDX_67]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_67:%.*]] = add nsw i32 [[VAL_67]], [[SUM_NEXT_66]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_67:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 68
+; CHECK-NEXT:    [[ARRAYIDX_68:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_67]]
+; CHECK-NEXT:    [[VAL_68:%.*]] = load i32, ptr [[ARRAYIDX_68]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_68:%.*]] = add nsw i32 [[VAL_68]], [[SUM_NEXT_67]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_68:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 69
+; CHECK-NEXT:    [[ARRAYIDX_69:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_68]]
+; CHECK-NEXT:    [[VAL_69:%.*]] = load i32, ptr [[ARRAYIDX_69]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_69:%.*]] = add nsw i32 [[VAL_69]], [[SUM_NEXT_68]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_69:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 70
+; CHECK-NEXT:    [[ARRAYIDX_70:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_69]]
+; CHECK-NEXT:    [[VAL_70:%.*]] = load i32, ptr [[ARRAYIDX_70]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_70:%.*]] = add nsw i32 [[VAL_70]], [[SUM_NEXT_69]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_70:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 71
+; CHECK-NEXT:    [[ARRAYIDX_71:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_70]]
+; CHECK-NEXT:    [[VAL_71:%.*]] = load i32, ptr [[ARRAYIDX_71]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_71:%.*]] = add nsw i32 [[VAL_71]], [[SUM_NEXT_70]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_71:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 72
+; CHECK-NEXT:    [[ARRAYIDX_72:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_71]]
+; CHECK-NEXT:    [[VAL_72:%.*]] = load i32, ptr [[ARRAYIDX_72]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_72:%.*]] = add nsw i32 [[VAL_72]], [[SUM_NEXT_71]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_72:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 73
+; CHECK-NEXT:    [[ARRAYIDX_73:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_72]]
+; CHECK-NEXT:    [[VAL_73:%.*]] = load i32, ptr [[ARRAYIDX_73]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_73:%.*]] = add nsw i32 [[VAL_73]], [[SUM_NEXT_72]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_73:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 74
+; CHECK-NEXT:    [[ARRAYIDX_74:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_73]]
+; CHECK-NEXT:    [[VAL_74:%.*]] = load i32, ptr [[ARRAYIDX_74]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_74:%.*]] = add nsw i32 [[VAL_74]], [[SUM_NEXT_73]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_74:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 75
+; CHECK-NEXT:    [[ARRAYIDX_75:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_74]]
+; CHECK-NEXT:    [[VAL_75:%.*]] = load i32, ptr [[ARRAYIDX_75]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_75:%.*]] = add nsw i32 [[VAL_75]], [[SUM_NEXT_74]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_75:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 76
+; CHECK-NEXT:    [[ARRAYIDX_76:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_75]]
+; CHECK-NEXT:    [[VAL_76:%.*]] = load i32, ptr [[ARRAYIDX_76]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_76:%.*]] = add nsw i32 [[VAL_76]], [[SUM_NEXT_75]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_76:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 77
+; CHECK-NEXT:    [[ARRAYIDX_77:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_76]]
+; CHECK-NEXT:    [[VAL_77:%.*]] = load i32, ptr [[ARRAYIDX_77]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_77:%.*]] = add nsw i32 [[VAL_77]], [[SUM_NEXT_76]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_77:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 78
+; CHECK-NEXT:    [[ARRAYIDX_78:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_77]]
+; CHECK-NEXT:    [[VAL_78:%.*]] = load i32, ptr [[ARRAYIDX_78]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_78:%.*]] = add nsw i32 [[VAL_78]], [[SUM_NEXT_77]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_78:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 79
+; CHECK-NEXT:    [[ARRAYIDX_79:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_78]]
+; CHECK-NEXT:    [[VAL_79:%.*]] = load i32, ptr [[ARRAYIDX_79]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_79:%.*]] = add nsw i32 [[VAL_79]], [[SUM_NEXT_78]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_79:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 80
+; CHECK-NEXT:    [[ARRAYIDX_80:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_79]]
+; CHECK-NEXT:    [[VAL_80:%.*]] = load i32, ptr [[ARRAYIDX_80]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_80:%.*]] = add nsw i32 [[VAL_80]], [[SUM_NEXT_79]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_80:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 81
+; CHECK-NEXT:    [[ARRAYIDX_81:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_80]]
+; CHECK-NEXT:    [[VAL_81:%.*]] = load i32, ptr [[ARRAYIDX_81]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_81:%.*]] = add nsw i32 [[VAL_81]], [[SUM_NEXT_80]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_81:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 82
+; CHECK-NEXT:    [[ARRAYIDX_82:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_81]]
+; CHECK-NEXT:    [[VAL_82:%.*]] = load i32, ptr [[ARRAYIDX_82]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_82:%.*]] = add nsw i32 [[VAL_82]], [[SUM_NEXT_81]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_82:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 83
+; CHECK-NEXT:    [[ARRAYIDX_83:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_82]]
+; CHECK-NEXT:    [[VAL_83:%.*]] = load i32, ptr [[ARRAYIDX_83]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_83:%.*]] = add nsw i32 [[VAL_83]], [[SUM_NEXT_82]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_83:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 84
+; CHECK-NEXT:    [[ARRAYIDX_84:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_83]]
+; CHECK-NEXT:    [[VAL_84:%.*]] = load i32, ptr [[ARRAYIDX_84]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_84:%.*]] = add nsw i32 [[VAL_84]], [[SUM_NEXT_83]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_84:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 85
+; CHECK-NEXT:    [[ARRAYIDX_85:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_84]]
+; CHECK-NEXT:    [[VAL_85:%.*]] = load i32, ptr [[ARRAYIDX_85]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_85:%.*]] = add nsw i32 [[VAL_85]], [[SUM_NEXT_84]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_85:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 86
+; CHECK-NEXT:    [[ARRAYIDX_86:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_85]]
+; CHECK-NEXT:    [[VAL_86:%.*]] = load i32, ptr [[ARRAYIDX_86]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_86:%.*]] = add nsw i32 [[VAL_86]], [[SUM_NEXT_85]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_86:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 87
+; CHECK-NEXT:    [[ARRAYIDX_87:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_86]]
+; CHECK-NEXT:    [[VAL_87:%.*]] = load i32, ptr [[ARRAYIDX_87]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_87:%.*]] = add nsw i32 [[VAL_87]], [[SUM_NEXT_86]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_87:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 88
+; CHECK-NEXT:    [[ARRAYIDX_88:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_87]]
+; CHECK-NEXT:    [[VAL_88:%.*]] = load i32, ptr [[ARRAYIDX_88]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_88:%.*]] = add nsw i32 [[VAL_88]], [[SUM_NEXT_87]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_88:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 89
+; CHECK-NEXT:    [[ARRAYIDX_89:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_88]]
+; CHECK-NEXT:    [[VAL_89:%.*]] = load i32, ptr [[ARRAYIDX_89]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_89:%.*]] = add nsw i32 [[VAL_89]], [[SUM_NEXT_88]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_89:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 90
+; CHECK-NEXT:    [[ARRAYIDX_90:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_89]]
+; CHECK-NEXT:    [[VAL_90:%.*]] = load i32, ptr [[ARRAYIDX_90]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_90:%.*]] = add nsw i32 [[VAL_90]], [[SUM_NEXT_89]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_90:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 91
+; CHECK-NEXT:    [[ARRAYIDX_91:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_90]]
+; CHECK-NEXT:    [[VAL_91:%.*]] = load i32, ptr [[ARRAYIDX_91]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_91:%.*]] = add nsw i32 [[VAL_91]], [[SUM_NEXT_90]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_91:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 92
+; CHECK-NEXT:    [[ARRAYIDX_92:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_91]]
+; CHECK-NEXT:    [[VAL_92:%.*]] = load i32, ptr [[ARRAYIDX_92]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_92:%.*]] = add nsw i32 [[VAL_92]], [[SUM_NEXT_91]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_92:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 93
+; CHECK-NEXT:    [[ARRAYIDX_93:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_92]]
+; CHECK-NEXT:    [[VAL_93:%.*]] = load i32, ptr [[ARRAYIDX_93]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_93:%.*]] = add nsw i32 [[VAL_93]], [[SUM_NEXT_92]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_93:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 94
+; CHECK-NEXT:    [[ARRAYIDX_94:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_93]]
+; CHECK-NEXT:    [[VAL_94:%.*]] = load i32, ptr [[ARRAYIDX_94]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_94:%.*]] = add nsw i32 [[VAL_94]], [[SUM_NEXT_93]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_94:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 95
+; CHECK-NEXT:    [[ARRAYIDX_95:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_94]]
+; CHECK-NEXT:    [[VAL_95:%.*]] = load i32, ptr [[ARRAYIDX_95]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_95:%.*]] = add nsw i32 [[VAL_95]], [[SUM_NEXT_94]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_95:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 96
+; CHECK-NEXT:    [[ARRAYIDX_96:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_95]]
+; CHECK-NEXT:    [[VAL_96:%.*]] = load i32, ptr [[ARRAYIDX_96]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_96:%.*]] = add nsw i32 [[VAL_96]], [[SUM_NEXT_95]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_96:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 97
+; CHECK-NEXT:    [[ARRAYIDX_97:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_96]]
+; CHECK-NEXT:    [[VAL_97:%.*]] = load i32, ptr [[ARRAYIDX_97]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_97:%.*]] = add nsw i32 [[VAL_97]], [[SUM_NEXT_96]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_97:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 98
+; CHECK-NEXT:    [[ARRAYIDX_98:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_97]]
+; CHECK-NEXT:    [[VAL_98:%.*]] = load i32, ptr [[ARRAYIDX_98]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_98:%.*]] = add nsw i32 [[VAL_98]], [[SUM_NEXT_97]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_98:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 99
+; CHECK-NEXT:    [[ARRAYIDX_99:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_98]]
+; CHECK-NEXT:    [[VAL_99:%.*]] = load i32, ptr [[ARRAYIDX_99]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_99:%.*]] = add nsw i32 [[VAL_99]], [[SUM_NEXT_98]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_99:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 100
+; CHECK-NEXT:    [[ARRAYIDX_100:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_99]]
+; CHECK-NEXT:    [[VAL_100:%.*]] = load i32, ptr [[ARRAYIDX_100]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_100:%.*]] = add nsw i32 [[VAL_100]], [[SUM_NEXT_99]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_100:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 101
+; CHECK-NEXT:    [[ARRAYIDX_101:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_100]]
+; CHECK-NEXT:    [[VAL_101:%.*]] = load i32, ptr [[ARRAYIDX_101]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_101:%.*]] = add nsw i32 [[VAL_101]], [[SUM_NEXT_100]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_101:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 102
+; CHECK-NEXT:    [[ARRAYIDX_102:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_101]]
+; CHECK-NEXT:    [[VAL_102:%.*]] = load i32, ptr [[ARRAYIDX_102]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_102:%.*]] = add nsw i32 [[VAL_102]], [[SUM_NEXT_101]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_102:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 103
+; CHECK-NEXT:    [[ARRAYIDX_103:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_102]]
+; CHECK-NEXT:    [[VAL_103:%.*]] = load i32, ptr [[ARRAYIDX_103]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_103:%.*]] = add nsw i32 [[VAL_103]], [[SUM_NEXT_102]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_103:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 104
+; CHECK-NEXT:    [[ARRAYIDX_104:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_103]]
+; CHECK-NEXT:    [[VAL_104:%.*]] = load i32, ptr [[ARRAYIDX_104]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_104:%.*]] = add nsw i32 [[VAL_104]], [[SUM_NEXT_103]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_104:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 105
+; CHECK-NEXT:    [[ARRAYIDX_105:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_104]]
+; CHECK-NEXT:    [[VAL_105:%.*]] = load i32, ptr [[ARRAYIDX_105]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_105:%.*]] = add nsw i32 [[VAL_105]], [[SUM_NEXT_104]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_105:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 106
+; CHECK-NEXT:    [[ARRAYIDX_106:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_105]]
+; CHECK-NEXT:    [[VAL_106:%.*]] = load i32, ptr [[ARRAYIDX_106]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_106:%.*]] = add nsw i32 [[VAL_106]], [[SUM_NEXT_105]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_106:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 107
+; CHECK-NEXT:    [[ARRAYIDX_107:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_106]]
+; CHECK-NEXT:    [[VAL_107:%.*]] = load i32, ptr [[ARRAYIDX_107]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_107:%.*]] = add nsw i32 [[VAL_107]], [[SUM_NEXT_106]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_107:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 108
+; CHECK-NEXT:    [[ARRAYIDX_108:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_107]]
+; CHECK-NEXT:    [[VAL_108:%.*]] = load i32, ptr [[ARRAYIDX_108]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_108:%.*]] = add nsw i32 [[VAL_108]], [[SUM_NEXT_107]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_108:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 109
+; CHECK-NEXT:    [[ARRAYIDX_109:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_108]]
+; CHECK-NEXT:    [[VAL_109:%.*]] = load i32, ptr [[ARRAYIDX_109]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_109:%.*]] = add nsw i32 [[VAL_109]], [[SUM_NEXT_108]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_109:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 110
+; CHECK-NEXT:    [[ARRAYIDX_110:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_109]]
+; CHECK-NEXT:    [[VAL_110:%.*]] = load i32, ptr [[ARRAYIDX_110]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_110:%.*]] = add nsw i32 [[VAL_110]], [[SUM_NEXT_109]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_110:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 111
+; CHECK-NEXT:    [[ARRAYIDX_111:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_110]]
+; CHECK-NEXT:    [[VAL_111:%.*]] = load i32, ptr [[ARRAYIDX_111]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_111:%.*]] = add nsw i32 [[VAL_111]], [[SUM_NEXT_110]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_111:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 112
+; CHECK-NEXT:    [[ARRAYIDX_112:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_111]]
+; CHECK-NEXT:    [[VAL_112:%.*]] = load i32, ptr [[ARRAYIDX_112]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_112:%.*]] = add nsw i32 [[VAL_112]], [[SUM_NEXT_111]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_112:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 113
+; CHECK-NEXT:    [[ARRAYIDX_113:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_112]]
+; CHECK-NEXT:    [[VAL_113:%.*]] = load i32, ptr [[ARRAYIDX_113]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_113:%.*]] = add nsw i32 [[VAL_113]], [[SUM_NEXT_112]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_113:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 114
+; CHECK-NEXT:    [[ARRAYIDX_114:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_113]]
+; CHECK-NEXT:    [[VAL_114:%.*]] = load i32, ptr [[ARRAYIDX_114]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_114:%.*]] = add nsw i32 [[VAL_114]], [[SUM_NEXT_113]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_114:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 115
+; CHECK-NEXT:    [[ARRAYIDX_115:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_114]]
+; CHECK-NEXT:    [[VAL_115:%.*]] = load i32, ptr [[ARRAYIDX_115]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_115:%.*]] = add nsw i32 [[VAL_115]], [[SUM_NEXT_114]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_115:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 116
+; CHECK-NEXT:    [[ARRAYIDX_116:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_115]]
+; CHECK-NEXT:    [[VAL_116:%.*]] = load i32, ptr [[ARRAYIDX_116]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_116:%.*]] = add nsw i32 [[VAL_116]], [[SUM_NEXT_115]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_116:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 117
+; CHECK-NEXT:    [[ARRAYIDX_117:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_116]]
+; CHECK-NEXT:    [[VAL_117:%.*]] = load i32, ptr [[ARRAYIDX_117]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_117:%.*]] = add nsw i32 [[VAL_117]], [[SUM_NEXT_116]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_117:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 118
+; CHECK-NEXT:    [[ARRAYIDX_118:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_117]]
+; CHECK-NEXT:    [[VAL_118:%.*]] = load i32, ptr [[ARRAYIDX_118]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_118:%.*]] = add nsw i32 [[VAL_118]], [[SUM_NEXT_117]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_118:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 119
+; CHECK-NEXT:    [[ARRAYIDX_119:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_118]]
+; CHECK-NEXT:    [[VAL_119:%.*]] = load i32, ptr [[ARRAYIDX_119]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_119:%.*]] = add nsw i32 [[VAL_119]], [[SUM_NEXT_118]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_119:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 120
+; CHECK-NEXT:    [[ARRAYIDX_120:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_119]]
+; CHECK-NEXT:    [[VAL_120:%.*]] = load i32, ptr [[ARRAYIDX_120]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_120:%.*]] = add nsw i32 [[VAL_120]], [[SUM_NEXT_119]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_120:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 121
+; CHECK-NEXT:    [[ARRAYIDX_121:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_120]]
+; CHECK-NEXT:    [[VAL_121:%.*]] = load i32, ptr [[ARRAYIDX_121]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_121:%.*]] = add nsw i32 [[VAL_121]], [[SUM_NEXT_120]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_121:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 122
+; CHECK-NEXT:    [[ARRAYIDX_122:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_121]]
+; CHECK-NEXT:    [[VAL_122:%.*]] = load i32, ptr [[ARRAYIDX_122]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_122:%.*]] = add nsw i32 [[VAL_122]], [[SUM_NEXT_121]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_122:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 123
+; CHECK-NEXT:    [[ARRAYIDX_123:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_122]]
+; CHECK-NEXT:    [[VAL_123:%.*]] = load i32, ptr [[ARRAYIDX_123]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_123:%.*]] = add nsw i32 [[VAL_123]], [[SUM_NEXT_122]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_123:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 124
+; CHECK-NEXT:    [[ARRAYIDX_124:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_123]]
+; CHECK-NEXT:    [[VAL_124:%.*]] = load i32, ptr [[ARRAYIDX_124]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_124:%.*]] = add nsw i32 [[VAL_124]], [[SUM_NEXT_123]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_124:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 125
+; CHECK-NEXT:    [[ARRAYIDX_125:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_124]]
+; CHECK-NEXT:    [[VAL_125:%.*]] = load i32, ptr [[ARRAYIDX_125]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_125:%.*]] = add nsw i32 [[VAL_125]], [[SUM_NEXT_124]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_125:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 126
+; CHECK-NEXT:    [[ARRAYIDX_126:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_125]]
+; CHECK-NEXT:    [[VAL_126:%.*]] = load i32, ptr [[ARRAYIDX_126]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_126:%.*]] = add nsw i32 [[VAL_126]], [[SUM_NEXT_125]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_126:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 127
+; CHECK-NEXT:    [[ARRAYIDX_127:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_126]]
+; CHECK-NEXT:    [[VAL_127:%.*]] = load i32, ptr [[ARRAYIDX_127]], align 4
+; CHECK-NEXT:    [[SUM_NEXT_127]] = add nsw i32 [[VAL_127]], [[SUM_NEXT_126]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_127]] = add nuw nsw i64 [[INDVARS_IV]], 128
+; CHECK-NEXT:    [[EXITCOND_NOT_127:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_127]], 8192
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT_127]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT_31]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT_127]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_NEXT_LCSSA]]
 ;
 entry:
@@ -167,16 +551,16 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-SAME: ptr [[ARY:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 7
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_COND_CLEANUP_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
 ; CHECK:       entry.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_1:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[DUMMY1:%.*]] = mul i32 [[VAL]], [[VAL]]
@@ -283,15 +667,339 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-NEXT:    [[DUMMY48_1:%.*]] = mul i32 [[DUMMY47_1]], [[DUMMY47_1]]
 ; CHECK-NEXT:    [[DUMMY49_1:%.*]] = mul i32 [[DUMMY48_1]], [[DUMMY48_1]]
 ; CHECK-NEXT:    [[DUMMY50_1:%.*]] = mul i32 [[DUMMY49_1]], [[DUMMY49_1]]
-; CHECK-NEXT:    [[SUM_NEXT_1]] = add nsw i32 [[DUMMY50_1]], [[SUM_NEXT]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
-; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[FOR_COND_CLEANUP_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    [[SUM_NEXT_1:%.*]] = add nsw i32 [[DUMMY50_1]], [[SUM_NEXT]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[VAL_2:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[DUMMY1_2:%.*]] = mul i32 [[VAL_2]], [[VAL_2]]
+; CHECK-NEXT:    [[DUMMY2_2:%.*]] = mul i32 [[DUMMY1_2]], [[DUMMY1_2]]
+; CHECK-NEXT:    [[DUMMY3_2:%.*]] = mul i32 [[DUMMY2_2]], [[DUMMY2_2]]
+; CHECK-NEXT:    [[DUMMY4_2:%.*]] = mul i32 [[DUMMY3_2]], [[DUMMY3_2]]
+; CHECK-NEXT:    [[DUMMY5_2:%.*]] = mul i32 [[DUMMY4_2]], [[DUMMY4_2]]
+; CHECK-NEXT:    [[DUMMY6_2:%.*]] = mul i32 [[DUMMY5_2]], [[DUMMY5_2]]
+; CHECK-NEXT:    [[DUMMY7_2:%.*]] = mul i32 [[DUMMY6_2]], [[DUMMY6_2]]
+; CHECK-NEXT:    [[DUMMY8_2:%.*]] = mul i32 [[DUMMY7_2]], [[DUMMY7_2]]
+; CHECK-NEXT:    [[DUMMY9_2:%.*]] = mul i32 [[DUMMY8_2]], [[DUMMY8_2]]
+; CHECK-NEXT:    [[DUMMY10_2:%.*]] = mul i32 [[DUMMY9_2]], [[DUMMY9_2]]
+; CHECK-NEXT:    [[DUMMY11_2:%.*]] = mul i32 [[DUMMY10_2]], [[DUMMY10_2]]
+; CHECK-NEXT:    [[DUMMY12_2:%.*]] = mul i32 [[DUMMY11_2]], [[DUMMY11_2]]
+; CHECK-NEXT:    [[DUMMY13_2:%.*]] = mul i32 [[DUMMY12_2]], [[DUMMY12_2]]
+; CHECK-NEXT:    [[DUMMY14_2:%.*]] = mul i32 [[DUMMY13_2]], [[DUMMY13_2]]
+; CHECK-NEXT:    [[DUMMY15_2:%.*]] = mul i32 [[DUMMY14_2]], [[DUMMY14_2]]
+; CHECK-NEXT:    [[DUMMY16_2:%.*]] = mul i32 [[DUMMY15_2]], [[DUMMY15_2]]
+; CHECK-NEXT:    [[DUMMY17_2:%.*]] = mul i32 [[DUMMY16_2]], [[DUMMY16_2]]
+; CHECK-NEXT:    [[DUMMY18_2:%.*]] = mul i32 [[DUMMY17_2]], [[DUMMY17_2]]
+; CHECK-NEXT:    [[DUMMY19_2:%.*]] = mul i32 [[DUMMY18_2]], [[DUMMY18_2]]
+; CHECK-NEXT:    [[DUMMY20_2:%.*]] = mul i32 [[DUMMY19_2]], [[DUMMY19_2]]
+; CHECK-NEXT:    [[DUMMY21_2:%.*]] = mul i32 [[DUMMY20_2]], [[DUMMY20_2]]
+; CHECK-NEXT:    [[DUMMY22_2:%.*]] = mul i32 [[DUMMY21_2]], [[DUMMY21_2]]
+; CHECK-NEXT:    [[DUMMY23_2:%.*]] = mul i32 [[DUMMY22_2]], [[DUMMY22_2]]
+; CHECK-NEXT:    [[DUMMY24_2:%.*]] = mul i32 [[DUMMY23_2]], [[DUMMY23_2]]
+; CHECK-NEXT:    [[DUMMY25_2:%.*]] = mul i32 [[DUMMY24_2]], [[DUMMY24_2]]
+; CHECK-NEXT:    [[DUMMY26_2:%.*]] = mul i32 [[DUMMY25_2]], [[DUMMY25_2]]
+; CHECK-NEXT:    [[DUMMY27_2:%.*]] = mul i32 [[DUMMY26_2]], [[DUMMY26_2]]
+; CHECK-NEXT:    [[DUMMY28_2:%.*]] = mul i32 [[DUMMY27_2]], [[DUMMY27_2]]
+; CHECK-NEXT:    [[DUMMY29_2:%.*]] = mul i32 [[DUMMY28_2]], [[DUMMY28_2]]
+; CHECK-NEXT:    [[DUMMY30_2:%.*]] = mul i32 [[DUMMY29_2]], [[DUMMY29_2]]
+; CHECK-NEXT:    [[DUMMY31_2:%.*]] = mul i32 [[DUMMY30_2]], [[DUMMY30_2]]
+; CHECK-NEXT:    [[DUMMY32_2:%.*]] = mul i32 [[DUMMY31_2]], [[DUMMY31_2]]
+; CHECK-NEXT:    [[DUMMY33_2:%.*]] = mul i32 [[DUMMY32_2]], [[DUMMY32_2]]
+; CHECK-NEXT:    [[DUMMY34_2:%.*]] = mul i32 [[DUMMY33_2]], [[DUMMY33_2]]
+; CHECK-NEXT:    [[DUMMY35_2:%.*]] = mul i32 [[DUMMY34_2]], [[DUMMY34_2]]
+; CHECK-NEXT:    [[DUMMY36_2:%.*]] = mul i32 [[DUMMY35_2]], [[DUMMY35_2]]
+; CHECK-NEXT:    [[DUMMY37_2:%.*]] = mul i32 [[DUMMY36_2]], [[DUMMY36_2]]
+; CHECK-NEXT:    [[DUMMY38_2:%.*]] = mul i32 [[DUMMY37_2]], [[DUMMY37_2]]
+; CHECK-NEXT:    [[DUMMY39_2:%.*]] = mul i32 [[DUMMY38_2]], [[DUMMY38_2]]
+; CHECK-NEXT:    [[DUMMY40_2:%.*]] = mul i32 [[DUMMY39_2]], [[DUMMY39_2]]
+; CHECK-NEXT:    [[DUMMY41_2:%.*]] = mul i32 [[DUMMY40_2]], [[DUMMY40_2]]
+; CHECK-NEXT:    [[DUMMY42_2:%.*]] = mul i32 [[DUMMY41_2]], [[DUMMY41_2]]
+; CHECK-NEXT:    [[DUMMY43_2:%.*]] = mul i32 [[DUMMY42_2]], [[DUMMY42_2]]
+; CHECK-NEXT:    [[DUMMY44_2:%.*]] = mul i32 [[DUMMY43_2]], [[DUMMY43_2]]
+; CHECK-NEXT:    [[DUMMY45_2:%.*]] = mul i32 [[DUMMY44_2]], [[DUMMY44_2]]
+; CHECK-NEXT:    [[DUMMY46_2:%.*]] = mul i32 [[DUMMY45_2]], [[DUMMY45_2]]
+; CHECK-NEXT:    [[DUMMY47_2:%.*]] = mul i32 [[DUMMY46_2]], [[DUMMY46_2]]
+; CHECK-NEXT:    [[DUMMY48_2:%.*]] = mul i32 [[DUMMY47_2]], [[DUMMY47_2]]
+; CHECK-NEXT:    [[DUMMY49_2:%.*]] = mul i32 [[DUMMY48_2]], [[DUMMY48_2]]
+; CHECK-NEXT:    [[DUMMY50_2:%.*]] = mul i32 [[DUMMY49_2]], [[DUMMY49_2]]
+; CHECK-NEXT:    [[SUM_NEXT_2:%.*]] = add nsw i32 [[DUMMY50_2]], [[SUM_NEXT_1]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[VAL_3:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[DUMMY1_3:%.*]] = mul i32 [[VAL_3]], [[VAL_3]]
+; CHECK-NEXT:    [[DUMMY2_3:%.*]] = mul i32 [[DUMMY1_3]], [[DUMMY1_3]]
+; CHECK-NEXT:    [[DUMMY3_3:%.*]] = mul i32 [[DUMMY2_3]], [[DUMMY2_3]]
+; CHECK-NEXT:    [[DUMMY4_3:%.*]] = mul i32 [[DUMMY3_3]], [[DUMMY3_3]]
+; CHECK-NEXT:    [[DUMMY5_3:%.*]] = mul i32 [[DUMMY4_3]], [[DUMMY4_3]]
+; CHECK-NEXT:    [[DUMMY6_3:%.*]] = mul i32 [[DUMMY5_3]], [[DUMMY5_3]]
+; CHECK-NEXT:    [[DUMMY7_3:%.*]] = mul i32 [[DUMMY6_3]], [[DUMMY6_3]]
+; CHECK-NEXT:    [[DUMMY8_3:%.*]] = mul i32 [[DUMMY7_3]], [[DUMMY7_3]]
+; CHECK-NEXT:    [[DUMMY9_3:%.*]] = mul i32 [[DUMMY8_3]], [[DUMMY8_3]]
+; CHECK-NEXT:    [[DUMMY10_3:%.*]] = mul i32 [[DUMMY9_3]], [[DUMMY9_3]]
+; CHECK-NEXT:    [[DUMMY11_3:%.*]] = mul i32 [[DUMMY10_3]], [[DUMMY10_3]]
+; CHECK-NEXT:    [[DUMMY12_3:%.*]] = mul i32 [[DUMMY11_3]], [[DUMMY11_3]]
+; CHECK-NEXT:    [[DUMMY13_3:%.*]] = mul i32 [[DUMMY12_3]], [[DUMMY12_3]]
+; CHECK-NEXT:    [[DUMMY14_3:%.*]] = mul i32 [[DUMMY13_3]], [[DUMMY13_3]]
+; CHECK-NEXT:    [[DUMMY15_3:%.*]] = mul i32 [[DUMMY14_3]], [[DUMMY14_3]]
+; CHECK-NEXT:    [[DUMMY16_3:%.*]] = mul i32 [[DUMMY15_3]], [[DUMMY15_3]]
+; CHECK-NEXT:    [[DUMMY17_3:%.*]] = mul i32 [[DUMMY16_3]], [[DUMMY16_3]]
+; CHECK-NEXT:    [[DUMMY18_3:%.*]] = mul i32 [[DUMMY17_3]], [[DUMMY17_3]]
+; CHECK-NEXT:    [[DUMMY19_3:%.*]] = mul i32 [[DUMMY18_3]], [[DUMMY18_3]]
+; CHECK-NEXT:    [[DUMMY20_3:%.*]] = mul i32 [[DUMMY19_3]], [[DUMMY19_3]]
+; CHECK-NEXT:    [[DUMMY21_3:%.*]] = mul i32 [[DUMMY20_3]], [[DUMMY20_3]]
+; CHECK-NEXT:    [[DUMMY22_3:%.*]] = mul i32 [[DUMMY21_3]], [[DUMMY21_3]]
+; CHECK-NEXT:    [[DUMMY23_3:%.*]] = mul i32 [[DUMMY22_3]], [[DUMMY22_3]]
+; CHECK-NEXT:    [[DUMMY24_3:%.*]] = mul i32 [[DUMMY23_3]], [[DUMMY23_3]]
+; CHECK-NEXT:    [[DUMMY25_3:%.*]] = mul i32 [[DUMMY24_3]], [[DUMMY24_3]]
+; CHECK-NEXT:    [[DUMMY26_3:%.*]] = mul i32 [[DUMMY25_3]], [[DUMMY25_3]]
+; CHECK-NEXT:    [[DUMMY27_3:%.*]] = mul i32 [[DUMMY26_3]], [[DUMMY26_3]]
+; CHECK-NEXT:    [[DUMMY28_3:%.*]] = mul i32 [[DUMMY27_3]], [[DUMMY27_3]]
+; CHECK-NEXT:    [[DUMMY29_3:%.*]] = mul i32 [[DUMMY28_3]], [[DUMMY28_3]]
+; CHECK-NEXT:    [[DUMMY30_3:%.*]] = mul i32 [[DUMMY29_3]], [[DUMMY29_3]]
+; CHECK-NEXT:    [[DUMMY31_3:%.*]] = mul i32 [[DUMMY30_3]], [[DUMMY30_3]]
+; CHECK-NEXT:    [[DUMMY32_3:%.*]] = mul i32 [[DUMMY31_3]], [[DUMMY31_3]]
+; CHECK-NEXT:    [[DUMMY33_3:%.*]] = mul i32 [[DUMMY32_3]], [[DUMMY32_3]]
+; CHECK-NEXT:    [[DUMMY34_3:%.*]] = mul i32 [[DUMMY33_3]], [[DUMMY33_3]]
+; CHECK-NEXT:    [[DUMMY35_3:%.*]] = mul i32 [[DUMMY34_3]], [[DUMMY34_3]]
+; CHECK-NEXT:    [[DUMMY36_3:%.*]] = mul i32 [[DUMMY35_3]], [[DUMMY35_3]]
+; CHECK-NEXT:    [[DUMMY37_3:%.*]] = mul i32 [[DUMMY36_3]], [[DUMMY36_3]]
+; CHECK-NEXT:    [[DUMMY38_3:%.*]] = mul i32 [[DUMMY37_3]], [[DUMMY37_3]]
+; CHECK-NEXT:    [[DUMMY39_3:%.*]] = mul i32 [[DUMMY38_3]], [[DUMMY38_3]]
+; CHECK-NEXT:    [[DUMMY40_3:%.*]] = mul i32 [[DUMMY39_3]], [[DUMMY39_3]]
+; CHECK-NEXT:    [[DUMMY41_3:%.*]] = mul i32 [[DUMMY40_3]], [[DUMMY40_3]]
+; CHECK-NEXT:    [[DUMMY42_3:%.*]] = mul i32 [[DUMMY41_3]], [[DUMMY41_3]]
+; CHECK-NEXT:    [[DUMMY43_3:%.*]] = mul i32 [[DUMMY42_3]], [[DUMMY42_3]]
+; CHECK-NEXT:    [[DUMMY44_3:%.*]] = mul i32 [[DUMMY43_3]], [[DUMMY43_3]]
+; CHECK-NEXT:    [[DUMMY45_3:%.*]] = mul i32 [[DUMMY44_3]], [[DUMMY44_3]]
+; CHECK-NEXT:    [[DUMMY46_3:%.*]] = mul i32 [[DUMMY45_3]], [[DUMMY45_3]]
+; CHECK-NEXT:    [[DUMMY47_3:%.*]] = mul i32 [[DUMMY46_3]], [[DUMMY46_3]]
+; CHECK-NEXT:    [[DUMMY48_3:%.*]] = mul i32 [[DUMMY47_3]], [[DUMMY47_3]]
+; CHECK-NEXT:    [[DUMMY49_3:%.*]] = mul i32 [[DUMMY48_3]], [[DUMMY48_3]]
+; CHECK-NEXT:    [[DUMMY50_3:%.*]] = mul i32 [[DUMMY49_3]], [[DUMMY49_3]]
+; CHECK-NEXT:    [[SUM_NEXT_3:%.*]] = add nsw i32 [[DUMMY50_3]], [[SUM_NEXT_2]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT:    [[VAL_4:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT:    [[DUMMY1_4:%.*]] = mul i32 [[VAL_4]], [[VAL_4]]
+; CHECK-NEXT:    [[DUMMY2_4:%.*]] = mul i32 [[DUMMY1_4]], [[DUMMY1_4]]
+; CHECK-NEXT:    [[DUMMY3_4:%.*]] = mul i32 [[DUMMY2_4]], [[DUMMY2_4]]
+; CHECK-NEXT:    [[DUMMY4_4:%.*]] = mul i32 [[DUMMY3_4]], [[DUMMY3_4]]
+; CHECK-NEXT:    [[DUMMY5_4:%.*]] = mul i32 [[DUMMY4_4]], [[DUMMY4_4]]
+; CHECK-NEXT:    [[DUMMY6_4:%.*]] = mul i32 [[DUMMY5_4]], [[DUMMY5_4]]
+; CHECK-NEXT:    [[DUMMY7_4:%.*]] = mul i32 [[DUMMY6_4]], [[DUMMY6_4]]
+; CHECK-NEXT:    [[DUMMY8_4:%.*]] = mul i32 [[DUMMY7_4]], [[DUMMY7_4]]
+; CHECK-NEXT:    [[DUMMY9_4:%.*]] = mul i32 [[DUMMY8_4]], [[DUMMY8_4]]
+; CHECK-NEXT:    [[DUMMY10_4:%.*]] = mul i32 [[DUMMY9_4]], [[DUMMY9_4]]
+; CHECK-NEXT:    [[DUMMY11_4:%.*]] = mul i32 [[DUMMY10_4]], [[DUMMY10_4]]
+; CHECK-NEXT:    [[DUMMY12_4:%.*]] = mul i32 [[DUMMY11_4]], [[DUMMY11_4]]
+; CHECK-NEXT:    [[DUMMY13_4:%.*]] = mul i32 [[DUMMY12_4]], [[DUMMY12_4]]
+; CHECK-NEXT:    [[DUMMY14_4:%.*]] = mul i32 [[DUMMY13_4]], [[DUMMY13_4]]
+; CHECK-NEXT:    [[DUMMY15_4:%.*]] = mul i32 [[DUMMY14_4]], [[DUMMY14_4]]
+; CHECK-NEXT:    [[DUMMY16_4:%.*]] = mul i32 [[DUMMY15_4]], [[DUMMY15_4]]
+; CHECK-NEXT:    [[DUMMY17_4:%.*]] = mul i32 [[DUMMY16_4]], [[DUMMY16_4]]
+; CHECK-NEXT:    [[DUMMY18_4:%.*]] = mul i32 [[DUMMY17_4]], [[DUMMY17_4]]
+; CHECK-NEXT:    [[DUMMY19_4:%.*]] = mul i32 [[DUMMY18_4]], [[DUMMY18_4]]
+; CHECK-NEXT:    [[DUMMY20_4:%.*]] = mul i32 [[DUMMY19_4]], [[DUMMY19_4]]
+; CHECK-NEXT:    [[DUMMY21_4:%.*]] = mul i32 [[DUMMY20_4]], [[DUMMY20_4]]
+; CHECK-NEXT:    [[DUMMY22_4:%.*]] = mul i32 [[DUMMY21_4]], [[DUMMY21_4]]
+; CHECK-NEXT:    [[DUMMY23_4:%.*]] = mul i32 [[DUMMY22_4]], [[DUMMY22_4]]
+; CHECK-NEXT:    [[DUMMY24_4:%.*]] = mul i32 [[DUMMY23_4]], [[DUMMY23_4]]
+; CHECK-NEXT:    [[DUMMY25_4:%.*]] = mul i32 [[DUMMY24_4]], [[DUMMY24_4]]
+; CHECK-NEXT:    [[DUMMY26_4:%.*]] = mul i32 [[DUMMY25_4]], [[DUMMY25_4]]
+; CHECK-NEXT:    [[DUMMY27_4:%.*]] = mul i32 [[DUMMY26_4]], [[DUMMY26_4]]
+; CHECK-NEXT:    [[DUMMY28_4:%.*]] = mul i32 [[DUMMY27_4]], [[DUMMY27_4]]
+; CHECK-NEXT:    [[DUMMY29_4:%.*]] = mul i32 [[DUMMY28_4]], [[DUMMY28_4]]
+; CHECK-NEXT:    [[DUMMY30_4:%.*]] = mul i32 [[DUMMY29_4]], [[DUMMY29_4]]
+; CHECK-NEXT:    [[DUMMY31_4:%.*]] = mul i32 [[DUMMY30_4]], [[DUMMY30_4]]
+; CHECK-NEXT:    [[DUMMY32_4:%.*]] = mul i32 [[DUMMY31_4]], [[DUMMY31_4]]
+; CHECK-NEXT:    [[DUMMY33_4:%.*]] = mul i32 [[DUMMY32_4]], [[DUMMY32_4]]
+; CHECK-NEXT:    [[DUMMY34_4:%.*]] = mul i32 [[DUMMY33_4]], [[DUMMY33_4]]
+; CHECK-NEXT:    [[DUMMY35_4:%.*]] = mul i32 [[DUMMY34_4]], [[DUMMY34_4]]
+; CHECK-NEXT:    [[DUMMY36_4:%.*]] = mul i32 [[DUMMY35_4]], [[DUMMY35_4]]
+; CHECK-NEXT:    [[DUMMY37_4:%.*]] = mul i32 [[DUMMY36_4]], [[DUMMY36_4]]
+; CHECK-NEXT:    [[DUMMY38_4:%.*]] = mul i32 [[DUMMY37_4]], [[DUMMY37_4]]
+; CHECK-NEXT:    [[DUMMY39_4:%.*]] = mul i32 [[DUMMY38_4]], [[DUMMY38_4]]
+; CHECK-NEXT:    [[DUMMY40_4:%.*]] = mul i32 [[DUMMY39_4]], [[DUMMY39_4]]
+; CHECK-NEXT:    [[DUMMY41_4:%.*]] = mul i32 [[DUMMY40_4]], [[DUMMY40_4]]
+; CHECK-NEXT:    [[DUMMY42_4:%.*]] = mul i32 [[DUMMY41_4]], [[DUMMY41_4]]
+; CHECK-NEXT:    [[DUMMY43_4:%.*]] = mul i32 [[DUMMY42_4]], [[DUMMY42_4]]
+; CHECK-NEXT:    [[DUMMY44_4:%.*]] = mul i32 [[DUMMY43_4]], [[DUMMY43_4]]
+; CHECK-NEXT:    [[DUMMY45_4:%.*]] = mul i32 [[DUMMY44_4]], [[DUMMY44_4]]
+; CHECK-NEXT:    [[DUMMY46_4:%.*]] = mul i32 [[DUMMY45_4]], [[DUMMY45_4]]
+; CHECK-NEXT:    [[DUMMY47_4:%.*]] = mul i32 [[DUMMY46_4]], [[DUMMY46_4]]
+; CHECK-NEXT:    [[DUMMY48_4:%.*]] = mul i32 [[DUMMY47_4]], [[DUMMY47_4]]
+; CHECK-NEXT:    [[DUMMY49_4:%.*]] = mul i32 [[DUMMY48_4]], [[DUMMY48_4]]
+; CHECK-NEXT:    [[DUMMY50_4:%.*]] = mul i32 [[DUMMY49_4]], [[DUMMY49_4]]
+; CHECK-NEXT:    [[SUM_NEXT_4:%.*]] = add nsw i32 [[DUMMY50_4]], [[SUM_NEXT_3]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT:    [[VAL_5:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT:    [[DUMMY1_5:%.*]] = mul i32 [[VAL_5]], [[VAL_5]]
+; CHECK-NEXT:    [[DUMMY2_5:%.*]] = mul i32 [[DUMMY1_5]], [[DUMMY1_5]]
+; CHECK-NEXT:    [[DUMMY3_5:%.*]] = mul i32 [[DUMMY2_5]], [[DUMMY2_5]]
+; CHECK-NEXT:    [[DUMMY4_5:%.*]] = mul i32 [[DUMMY3_5]], [[DUMMY3_5]]
+; CHECK-NEXT:    [[DUMMY5_5:%.*]] = mul i32 [[DUMMY4_5]], [[DUMMY4_5]]
+; CHECK-NEXT:    [[DUMMY6_5:%.*]] = mul i32 [[DUMMY5_5]], [[DUMMY5_5]]
+; CHECK-NEXT:    [[DUMMY7_5:%.*]] = mul i32 [[DUMMY6_5]], [[DUMMY6_5]]
+; CHECK-NEXT:    [[DUMMY8_5:%.*]] = mul i32 [[DUMMY7_5]], [[DUMMY7_5]]
+; CHECK-NEXT:    [[DUMMY9_5:%.*]] = mul i32 [[DUMMY8_5]], [[DUMMY8_5]]
+; CHECK-NEXT:    [[DUMMY10_5:%.*]] = mul i32 [[DUMMY9_5]], [[DUMMY9_5]]
+; CHECK-NEXT:    [[DUMMY11_5:%.*]] = mul i32 [[DUMMY10_5]], [[DUMMY10_5]]
+; CHECK-NEXT:    [[DUMMY12_5:%.*]] = mul i32 [[DUMMY11_5]], [[DUMMY11_5]]
+; CHECK-NEXT:    [[DUMMY13_5:%.*]] = mul i32 [[DUMMY12_5]], [[DUMMY12_5]]
+; CHECK-NEXT:    [[DUMMY14_5:%.*]] = mul i32 [[DUMMY13_5]], [[DUMMY13_5]]
+; CHECK-NEXT:    [[DUMMY15_5:%.*]] = mul i32 [[DUMMY14_5]], [[DUMMY14_5]]
+; CHECK-NEXT:    [[DUMMY16_5:%.*]] = mul i32 [[DUMMY15_5]], [[DUMMY15_5]]
+; CHECK-NEXT:    [[DUMMY17_5:%.*]] = mul i32 [[DUMMY16_5]], [[DUMMY16_5]]
+; CHECK-NEXT:    [[DUMMY18_5:%.*]] = mul i32 [[DUMMY17_5]], [[DUMMY17_5]]
+; CHECK-NEXT:    [[DUMMY19_5:%.*]] = mul i32 [[DUMMY18_5]], [[DUMMY18_5]]
+; CHECK-NEXT:    [[DUMMY20_5:%.*]] = mul i32 [[DUMMY19_5]], [[DUMMY19_5]]
+; CHECK-NEXT:    [[DUMMY21_5:%.*]] = mul i32 [[DUMMY20_5]], [[DUMMY20_5]]
+; CHECK-NEXT:    [[DUMMY22_5:%.*]] = mul i32 [[DUMMY21_5]], [[DUMMY21_5]]
+; CHECK-NEXT:    [[DUMMY23_5:%.*]] = mul i32 [[DUMMY22_5]], [[DUMMY22_5]]
+; CHECK-NEXT:    [[DUMMY24_5:%.*]] = mul i32 [[DUMMY23_5]], [[DUMMY23_5]]
+; CHECK-NEXT:    [[DUMMY25_5:%.*]] = mul i32 [[DUMMY24_5]], [[DUMMY24_5]]
+; CHECK-NEXT:    [[DUMMY26_5:%.*]] = mul i32 [[DUMMY25_5]], [[DUMMY25_5]]
+; CHECK-NEXT:    [[DUMMY27_5:%.*]] = mul i32 [[DUMMY26_5]], [[DUMMY26_5]]
+; CHECK-NEXT:    [[DUMMY28_5:%.*]] = mul i32 [[DUMMY27_5]], [[DUMMY27_5]]
+; CHECK-NEXT:    [[DUMMY29_5:%.*]] = mul i32 [[DUMMY28_5]], [[DUMMY28_5]]
+; CHECK-NEXT:    [[DUMMY30_5:%.*]] = mul i32 [[DUMMY29_5]], [[DUMMY29_5]]
+; CHECK-NEXT:    [[DUMMY31_5:%.*]] = mul i32 [[DUMMY30_5]], [[DUMMY30_5]]
+; CHECK-NEXT:    [[DUMMY32_5:%.*]] = mul i32 [[DUMMY31_5]], [[DUMMY31_5]]
+; CHECK-NEXT:    [[DUMMY33_5:%.*]] = mul i32 [[DUMMY32_5]], [[DUMMY32_5]]
+; CHECK-NEXT:    [[DUMMY34_5:%.*]] = mul i32 [[DUMMY33_5]], [[DUMMY33_5]]
+; CHECK-NEXT:    [[DUMMY35_5:%.*]] = mul i32 [[DUMMY34_5]], [[DUMMY34_5]]
+; CHECK-NEXT:    [[DUMMY36_5:%.*]] = mul i32 [[DUMMY35_5]], [[DUMMY35_5]]
+; CHECK-NEXT:    [[DUMMY37_5:%.*]] = mul i32 [[DUMMY36_5]], [[DUMMY36_5]]
+; CHECK-NEXT:    [[DUMMY38_5:%.*]] = mul i32 [[DUMMY37_5]], [[DUMMY37_5]]
+; CHECK-NEXT:    [[DUMMY39_5:%.*]] = mul i32 [[DUMMY38_5]], [[DUMMY38_5]]
+; CHECK-NEXT:    [[DUMMY40_5:%.*]] = mul i32 [[DUMMY39_5]], [[DUMMY39_5]]
+; CHECK-NEXT:    [[DUMMY41_5:%.*]] = mul i32 [[DUMMY40_5]], [[DUMMY40_5]]
+; CHECK-NEXT:    [[DUMMY42_5:%.*]] = mul i32 [[DUMMY41_5]], [[DUMMY41_5]]
+; CHECK-NEXT:    [[DUMMY43_5:%.*]] = mul i32 [[DUMMY42_5]], [[DUMMY42_5]]
+; CHECK-NEXT:    [[DUMMY44_5:%.*]] = mul i32 [[DUMMY43_5]], [[DUMMY43_5]]
+; CHECK-NEXT:    [[DUMMY45_5:%.*]] = mul i32 [[DUMMY44_5]], [[DUMMY44_5]]
+; CHECK-NEXT:    [[DUMMY46_5:%.*]] = mul i32 [[DUMMY45_5]], [[DUMMY45_5]]
+; CHECK-NEXT:    [[DUMMY47_5:%.*]] = mul i32 [[DUMMY46_5]], [[DUMMY46_5]]
+; CHECK-NEXT:    [[DUMMY48_5:%.*]] = mul i32 [[DUMMY47_5]], [[DUMMY47_5]]
+; CHECK-NEXT:    [[DUMMY49_5:%.*]] = mul i32 [[DUMMY48_5]], [[DUMMY48_5]]
+; CHECK-NEXT:    [[DUMMY50_5:%.*]] = mul i32 [[DUMMY49_5]], [[DUMMY49_5]]
+; CHECK-NEXT:    [[SUM_NEXT_5:%.*]] = add nsw i32 [[DUMMY50_5]], [[SUM_NEXT_4]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT:    [[VAL_6:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT:    [[DUMMY1_6:%.*]] = mul i32 [[VAL_6]], [[VAL_6]]
+; CHECK-NEXT:    [[DUMMY2_6:%.*]] = mul i32 [[DUMMY1_6]], [[DUMMY1_6]]
+; CHECK-NEXT:    [[DUMMY3_6:%.*]] = mul i32 [[DUMMY2_6]], [[DUMMY2_6]]
+; CHECK-NEXT:    [[DUMMY4_6:%.*]] = mul i32 [[DUMMY3_6]], [[DUMMY3_6]]
+; CHECK-NEXT:    [[DUMMY5_6:%.*]] = mul i32 [[DUMMY4_6]], [[DUMMY4_6]]
+; CHECK-NEXT:    [[DUMMY6_6:%.*]] = mul i32 [[DUMMY5_6]], [[DUMMY5_6]]
+; CHECK-NEXT:    [[DUMMY7_6:%.*]] = mul i32 [[DUMMY6_6]], [[DUMMY6_6]]
+; CHECK-NEXT:    [[DUMMY8_6:%.*]] = mul i32 [[DUMMY7_6]], [[DUMMY7_6]]
+; CHECK-NEXT:    [[DUMMY9_6:%.*]] = mul i32 [[DUMMY8_6]], [[DUMMY8_6]]
+; CHECK-NEXT:    [[DUMMY10_6:%.*]] = mul i32 [[DUMMY9_6]], [[DUMMY9_6]]
+; CHECK-NEXT:    [[DUMMY11_6:%.*]] = mul i32 [[DUMMY10_6]], [[DUMMY10_6]]
+; CHECK-NEXT:    [[DUMMY12_6:%.*]] = mul i32 [[DUMMY11_6]], [[DUMMY11_6]]
+; CHECK-NEXT:    [[DUMMY13_6:%.*]] = mul i32 [[DUMMY12_6]], [[DUMMY12_6]]
+; CHECK-NEXT:    [[DUMMY14_6:%.*]] = mul i32 [[DUMMY13_6]], [[DUMMY13_6]]
+; CHECK-NEXT:    [[DUMMY15_6:%.*]] = mul i32 [[DUMMY14_6]], [[DUMMY14_6]]
+; CHECK-NEXT:    [[DUMMY16_6:%.*]] = mul i32 [[DUMMY15_6]], [[DUMMY15_6]]
+; CHECK-NEXT:    [[DUMMY17_6:%.*]] = mul i32 [[DUMMY16_6]], [[DUMMY16_6]]
+; CHECK-NEXT:    [[DUMMY18_6:%.*]] = mul i32 [[DUMMY17_6]], [[DUMMY17_6]]
+; CHECK-NEXT:    [[DUMMY19_6:%.*]] = mul i32 [[DUMMY18_6]], [[DUMMY18_6]]
+; CHECK-NEXT:    [[DUMMY20_6:%.*]] = mul i32 [[DUMMY19_6]], [[DUMMY19_6]]
+; CHECK-NEXT:    [[DUMMY21_6:%.*]] = mul i32 [[DUMMY20_6]], [[DUMMY20_6]]
+; CHECK-NEXT:    [[DUMMY22_6:%.*]] = mul i32 [[DUMMY21_6]], [[DUMMY21_6]]
+; CHECK-NEXT:    [[DUMMY23_6:%.*]] = mul i32 [[DUMMY22_6]], [[DUMMY22_6]]
+; CHECK-NEXT:    [[DUMMY24_6:%.*]] = mul i32 [[DUMMY23_6]], [[DUMMY23_6]]
+; CHECK-NEXT:    [[DUMMY25_6:%.*]] = mul i32 [[DUMMY24_6]], [[DUMMY24_6]]
+; CHECK-NEXT:    [[DUMMY26_6:%.*]] = mul i32 [[DUMMY25_6]], [[DUMMY25_6]]
+; CHECK-NEXT:    [[DUMMY27_6:%.*]] = mul i32 [[DUMMY26_6]], [[DUMMY26_6]]
+; CHECK-NEXT:    [[DUMMY28_6:%.*]] = mul i32 [[DUMMY27_6]], [[DUMMY27_6]]
+; CHECK-NEXT:    [[DUMMY29_6:%.*]] = mul i32 [[DUMMY28_6]], [[DUMMY28_6]]
+; CHECK-NEXT:    [[DUMMY30_6:%.*]] = mul i32 [[DUMMY29_6]], [[DUMMY29_6]]
+; CHECK-NEXT:    [[DUMMY31_6:%.*]] = mul i32 [[DUMMY30_6]], [[DUMMY30_6]]
+; CHECK-NEXT:    [[DUMMY32_6:%.*]] = mul i32 [[DUMMY31_6]], [[DUMMY31_6]]
+; CHECK-NEXT:    [[DUMMY33_6:%.*]] = mul i32 [[DUMMY32_6]], [[DUMMY32_6]]
+; CHECK-NEXT:    [[DUMMY34_6:%.*]] = mul i32 [[DUMMY33_6]], [[DUMMY33_6]]
+; CHECK-NEXT:    [[DUMMY35_6:%.*]] = mul i32 [[DUMMY34_6]], [[DUMMY34_6]]
+; CHECK-NEXT:    [[DUMMY36_6:%.*]] = mul i32 [[DUMMY35_6]], [[DUMMY35_6]]
+; CHECK-NEXT:    [[DUMMY37_6:%.*]] = mul i32 [[DUMMY36_6]], [[DUMMY36_6]]
+; CHECK-NEXT:    [[DUMMY38_6:%.*]] = mul i32 [[DUMMY37_6]], [[DUMMY37_6]]
+; CHECK-NEXT:    [[DUMMY39_6:%.*]] = mul i32 [[DUMMY38_6]], [[DUMMY38_6]]
+; CHECK-NEXT:    [[DUMMY40_6:%.*]] = mul i32 [[DUMMY39_6]], [[DUMMY39_6]]
+; CHECK-NEXT:    [[DUMMY41_6:%.*]] = mul i32 [[DUMMY40_6]], [[DUMMY40_6]]
+; CHECK-NEXT:    [[DUMMY42_6:%.*]] = mul i32 [[DUMMY41_6]], [[DUMMY41_6]]
+; CHECK-NEXT:    [[DUMMY43_6:%.*]] = mul i32 [[DUMMY42_6]], [[DUMMY42_6]]
+; CHECK-NEXT:    [[DUMMY44_6:%.*]] = mul i32 [[DUMMY43_6]], [[DUMMY43_6]]
+; CHECK-NEXT:    [[DUMMY45_6:%.*]] = mul i32 [[DUMMY44_6]], [[DUMMY44_6]]
+; CHECK-NEXT:    [[DUMMY46_6:%.*]] = mul i32 [[DUMMY45_6]], [[DUMMY45_6]]
+; CHECK-NEXT:    [[DUMMY47_6:%.*]] = mul i32 [[DUMMY46_6]], [[DUMMY46_6]]
+; CHECK-NEXT:    [[DUMMY48_6:%.*]] = mul i32 [[DUMMY47_6]], [[DUMMY47_6]]
+; CHECK-NEXT:    [[DUMMY49_6:%.*]] = mul i32 [[DUMMY48_6]], [[DUMMY48_6]]
+; CHECK-NEXT:    [[DUMMY50_6:%.*]] = mul i32 [[DUMMY49_6]], [[DUMMY49_6]]
+; CHECK-NEXT:    [[SUM_NEXT_6:%.*]] = add nsw i32 [[DUMMY50_6]], [[SUM_NEXT_5]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT:    [[VAL_7:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT:    [[DUMMY1_7:%.*]] = mul i32 [[VAL_7]], [[VAL_7]]
+; CHECK-NEXT:    [[DUMMY2_7:%.*]] = mul i32 [[DUMMY1_7]], [[DUMMY1_7]]
+; CHECK-NEXT:    [[DUMMY3_7:%.*]] = mul i32 [[DUMMY2_7]], [[DUMMY2_7]]
+; CHECK-NEXT:    [[DUMMY4_7:%.*]] = mul i32 [[DUMMY3_7]], [[DUMMY3_7]]
+; CHECK-NEXT:    [[DUMMY5_7:%.*]] = mul i32 [[DUMMY4_7]], [[DUMMY4_7]]
+; CHECK-NEXT:    [[DUMMY6_7:%.*]] = mul i32 [[DUMMY5_7]], [[DUMMY5_7]]
+; CHECK-NEXT:    [[DUMMY7_7:%.*]] = mul i32 [[DUMMY6_7]], [[DUMMY6_7]]
+; CHECK-NEXT:    [[DUMMY8_7:%.*]] = mul i32 [[DUMMY7_7]], [[DUMMY7_7]]
+; CHECK-NEXT:    [[DUMMY9_7:%.*]] = mul i32 [[DUMMY8_7]], [[DUMMY8_7]]
+; CHECK-NEXT:    [[DUMMY10_7:%.*]] = mul i32 [[DUMMY9_7]], [[DUMMY9_7]]
+; CHECK-NEXT:    [[DUMMY11_7:%.*]] = mul i32 [[DUMMY10_7]], [[DUMMY10_7]]
+; CHECK-NEXT:    [[DUMMY12_7:%.*]] = mul i32 [[DUMMY11_7]], [[DUMMY11_7]]
+; CHECK-NEXT:    [[DUMMY13_7:%.*]] = mul i32 [[DUMMY12_7]], [[DUMMY12_7]]
+; CHECK-NEXT:    [[DUMMY14_7:%.*]] = mul i32 [[DUMMY13_7]], [[DUMMY13_7]]
+; CHECK-NEXT:    [[DUMMY15_7:%.*]] = mul i32 [[DUMMY14_7]], [[DUMMY14_7]]
+; CHECK-NEXT:    [[DUMMY16_7:%.*]] = mul i32 [[DUMMY15_7]], [[DUMMY15_7]]
+; CHECK-NEXT:    [[DUMMY17_7:%.*]] = mul i32 [[DUMMY16_7]], [[DUMMY16_7]]
+; CHECK-NEXT:    [[DUMMY18_7:%.*]] = mul i32 [[DUMMY17_7]], [[DUMMY17_7]]
+; CHECK-NEXT:    [[DUMMY19_7:%.*]] = mul i32 [[DUMMY18_7]], [[DUMMY18_7]]
+; CHECK-NEXT:    [[DUMMY20_7:%.*]] = mul i32 [[DUMMY19_7]], [[DUMMY19_7]]
+; CHECK-NEXT:    [[DUMMY21_7:%.*]] = mul i32 [[DUMMY20_7]], [[DUMMY20_7]]
+; CHECK-NEXT:    [[DUMMY22_7:%.*]] = mul i32 [[DUMMY21_7]], [[DUMMY21_7]]
+; CHECK-NEXT:    [[DUMMY23_7:%.*]] = mul i32 [[DUMMY22_7]], [[DUMMY22_7]]
+; CHECK-NEXT:    [[DUMMY24_7:%.*]] = mul i32 [[DUMMY23_7]], [[DUMMY23_7]]
+; CHECK-NEXT:    [[DUMMY25_7:%.*]] = mul i32 [[DUMMY24_7]], [[DUMMY24_7]]
+; CHECK-NEXT:    [[DUMMY26_7:%.*]] = mul i32 [[DUMMY25_7]], [[DUMMY25_7]]
+; CHECK-NEXT:    [[DUMMY27_7:%.*]] = mul i32 [[DUMMY26_7]], [[DUMMY26_7]]
+; CHECK-NEXT:    [[DUMMY28_7:%.*]] = mul i32 [[DUMMY27_7]], [[DUMMY27_7]]
+; CHECK-NEXT:    [[DUMMY29_7:%.*]] = mul i32 [[DUMMY28_7]], [[DUMMY28_7]]
+; CHECK-NEXT:    [[DUMMY30_7:%.*]] = mul i32 [[DUMMY29_7]], [[DUMMY29_7]]
+; CHECK-NEXT:    [[DUMMY31_7:%.*]] = mul i32 [[DUMMY30_7]], [[DUMMY30_7]]
+; CHECK-NEXT:    [[DUMMY32_7:%.*]] = mul i32 [[DUMMY31_7]], [[DUMMY31_7]]
+; CHECK-NEXT:    [[DUMMY33_7:%.*]] = mul i32 [[DUMMY32_7]], [[DUMMY32_7]]
+; CHECK-NEXT:    [[DUMMY34_7:%.*]] = mul i32 [[DUMMY33_7]], [[DUMMY33_7]]
+; CHECK-NEXT:    [[DUMMY35_7:%.*]] = mul i32 [[DUMMY34_7]], [[DUMMY34_7]]
+; CHECK-NEXT:    [[DUMMY36_7:%.*]] = mul i32 [[DUMMY35_7]], [[DUMMY35_7]]
+; CHECK-NEXT:    [[DUMMY37_7:%.*]] = mul i32 [[DUMMY36_7]], [[DUMMY36_7]]
+; CHECK-NEXT:    [[DUMMY38_7:%.*]] = mul i32 [[DUMMY37_7]], [[DUMMY37_7]]
+; CHECK-NEXT:    [[DUMMY39_7:%.*]] = mul i32 [[DUMMY38_7]], [[DUMMY38_7]]
+; CHECK-NEXT:    [[DUMMY40_7:%.*]] = mul i32 [[DUMMY39_7]], [[DUMMY39_7]]
+; CHECK-NEXT:    [[DUMMY41_7:%.*]] = mul i32 [[DUMMY40_7]], [[DUMMY40_7]]
+; CHECK-NEXT:    [[DUMMY42_7:%.*]] = mul i32 [[DUMMY41_7]], [[DUMMY41_7]]
+; CHECK-NEXT:    [[DUMMY43_7:%.*]] = mul i32 [[DUMMY42_7]], [[DUMMY42_7]]
+; CHECK-NEXT:    [[DUMMY44_7:%.*]] = mul i32 [[DUMMY43_7]], [[DUMMY43_7]]
+; CHECK-NEXT:    [[DUMMY45_7:%.*]] = mul i32 [[DUMMY44_7]], [[DUMMY44_7]]
+; CHECK-NEXT:    [[DUMMY46_7:%.*]] = mul i32 [[DUMMY45_7]], [[DUMMY45_7]]
+; CHECK-NEXT:    [[DUMMY47_7:%.*]] = mul i32 [[DUMMY46_7]], [[DUMMY46_7]]
+; CHECK-NEXT:    [[DUMMY48_7:%.*]] = mul i32 [[DUMMY47_7]], [[DUMMY47_7]]
+; CHECK-NEXT:    [[DUMMY49_7:%.*]] = mul i32 [[DUMMY48_7]], [[DUMMY48_7]]
+; CHECK-NEXT:    [[DUMMY50_7:%.*]] = mul i32 [[DUMMY49_7]], [[DUMMY49_7]]
+; CHECK-NEXT:    [[SUM_NEXT_7]] = add nsw i32 [[DUMMY50_7]], [[SUM_NEXT_6]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
+; CHECK-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[FOR_COND_CLEANUP_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.cond.cleanup.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[SUM_NEXT_LCSSA_PH_PH:%.*]] = phi i32 [ [[SUM_NEXT_1]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[SUM_UNR_PH:%.*]] = phi i32 [ [[SUM_NEXT_1]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM_NEXT_LCSSA_PH_PH:%.*]] = phi i32 [ [[SUM_NEXT_7]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM_UNR_PH:%.*]] = phi i32 [ [[SUM_NEXT_7]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_UNR_LCSSA]]
 ; CHECK:       for.cond.cleanup.unr-lcssa:
 ; CHECK-NEXT:    [[SUM_NEXT_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_NEXT_LCSSA_PH_PH]], [[FOR_COND_CLEANUP_UNR_LCSSA_LOOPEXIT]] ]
@@ -302,7 +1010,10 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK:       for.body.epil.preheader:
 ; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
 ; CHECK:       for.body.epil:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_UNR]]
+; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ], [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ]
+; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ [[SUM_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ], [ [[SUM_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ]
+; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, [[FOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ]
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[ARY]], i64 [[INDVARS_IV_EPIL]]
 ; CHECK-NEXT:    [[VAL_EPIL:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
 ; CHECK-NEXT:    [[DUMMY1_EPIL:%.*]] = mul i32 [[VAL_EPIL]], [[VAL_EPIL]]
 ; CHECK-NEXT:    [[DUMMY2_EPIL:%.*]] = mul i32 [[DUMMY1_EPIL]], [[DUMMY1_EPIL]]
@@ -354,10 +1065,17 @@ define i32 @test2(ptr %ary, i64 %n) "target-cpu"="znver3" {
 ; CHECK-NEXT:    [[DUMMY48_EPIL:%.*]] = mul i32 [[DUMMY47_EPIL]], [[DUMMY47_EPIL]]
 ; CHECK-NEXT:    [[DUMMY49_EPIL:%.*]] = mul i32 [[DUMMY48_EPIL]], [[DUMMY48_EPIL]]
 ; CHECK-NEXT:    [[DUMMY50_EPIL:%.*]] = mul i32 [[DUMMY49_EPIL]], [[DUMMY49_EPIL]]
-; CHECK-NEXT:    [[SUM_NEXT_EPIL:%.*]] = add nsw i32 [[DUMMY50_EPIL]], [[SUM_UNR]]
+; CHECK-NEXT:    [[SUM_NEXT_EPIL]] = add nsw i32 [[DUMMY50_EPIL]], [[SUM_EPIL]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
+; CHECK-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL]], label [[FOR_COND_CLEANUP_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       for.cond.cleanup.epilog-lcssa:
+; CHECK-NEXT:    [[SUM_NEXT_LCSSA_PH1:%.*]] = phi i32 [ [[SUM_NEXT_EPIL]], [[FOR_BODY_EPIL]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT_LCSSA_PH]], [[FOR_COND_CLEANUP_UNR_LCSSA]] ], [ [[SUM_NEXT_EPIL]], [[FOR_BODY_EPIL]] ]
+; CHECK-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT_LCSSA_PH]], [[FOR_COND_CLEANUP_UNR_LCSSA]] ], [ [[SUM_NEXT_LCSSA_PH1]], [[FOR_COND_CLEANUP_EPILOG_LCSSA]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_NEXT_LCSSA]]
 ;
 entry:


        


More information about the llvm-commits mailing list