[llvm-branch-commits] [llvm] e963223 - [SimplifyCFG] Not folding branch in constant loops which expected unroll

Zhang Xiang via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Dec 4 00:34:27 PST 2023


Author: Zhang Xiang
Date: 2023-12-04T16:26:19+08:00
New Revision: e9632236b80710416dd1785cc7665b6e30dd28a5

URL: https://github.com/llvm/llvm-project/commit/e9632236b80710416dd1785cc7665b6e30dd28a5
DIFF: https://github.com/llvm/llvm-project/commit/e9632236b80710416dd1785cc7665b6e30dd28a5.diff

LOG: [SimplifyCFG] Not folding branch in constant loops which expected unroll

Constant iteration loop with unroll hint usually expected do unroll
by consumers, folding branches in such loop header at SimplifyCFG will
break unroll optimization.

Added: 
    

Modified: 
    clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
    llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
index b6502ce76c298..20506bc24806a 100644
--- a/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
+++ b/clang/test/CodeGenCUDA/simplify-cfg-unroll.cu
@@ -11,50 +11,505 @@ __device__ void bar();
 // CHECK-LABEL: define dso_local void @_Z4funciPPiiS_(
 // CHECK-SAME: i32 noundef [[IDX:%.*]], ptr nocapture noundef readonly [[ARR:%.*]], i32 noundef [[DIMS:%.*]], ptr nocapture noundef [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CMP127:%.*]] = icmp eq i32 [[DIMS]], 0
-// CHECK-NEXT:    br i1 [[CMP127]], label [[CLEANUP:%.*]], label [[IF_END_PREHEADER:%.*]]
-// CHECK:       if.end.preheader:
-// CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[DIMS]], -1
-// CHECK-NEXT:    [[UMIN:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 15)
-// CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i32 [[UMIN]], 1
-// CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[TMP1]] to i64
-// CHECK-NEXT:    [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
-// CHECK-NEXT:    [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
-// CHECK-NEXT:    [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
-// CHECK-NEXT:    br label [[IF_END:%.*]]
+// CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[DIMS]], 0
+// CHECK-NEXT:    br i1 [[CMP1]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
 // CHECK:       if.end:
-// CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[IF_END_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END]] ]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 [[INDVARS_IV]]
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3:![0-9]+]]
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA7:![0-9]+]]
-// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP4]], [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR]], align 8, !tbaa [[TBAA3:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA7:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
 // CHECK-NEXT:    store i32 [[ADD14]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3:[0-9]+]]
-// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
-// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP7]], [[TMP6]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARR]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 1
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 1
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
 // CHECK-NEXT:    store i32 [[ADD14_1]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3]]
-// CHECK-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 2
-// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[ARR]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 2
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 2
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[TMP8]], [[TMP7]]
 // CHECK-NEXT:    store i32 [[ADD14_2]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3]]
-// CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
-// CHECK-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 3
-// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[ARR]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 3
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 3
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3:%.*]] = add nsw i32 [[TMP11]], [[TMP10]]
 // CHECK-NEXT:    store i32 [[ADD14_3]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
-// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR3]]
-// CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-// CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-// CHECK-NEXT:    br i1 [[EXITCOND]], label [[CLEANUP]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_1:%.*]] = icmp eq i32 [[DIMS]], 1
+// CHECK-NEXT:    br i1 [[CMP1_1]], label [[CLEANUP]], label [[IF_END_1:%.*]]
+// CHECK:       if.end.1:
+// CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 1
+// CHECK-NEXT:    [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_129:%.*]] = add nsw i32 [[TMP14]], [[TMP13]]
+// CHECK-NEXT:    store i32 [[ADD14_129]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_1:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 1
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX11_1_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_1:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
+// CHECK-NEXT:    store i32 [[ADD14_1_1]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_1:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 2
+// CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX11_2_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_1:%.*]] = add nsw i32 [[TMP20]], [[TMP19]]
+// CHECK-NEXT:    store i32 [[ADD14_2_1]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_1:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 3
+// CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX11_3_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_1:%.*]] = add nsw i32 [[TMP23]], [[TMP22]]
+// CHECK-NEXT:    store i32 [[ADD14_3_1]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_2:%.*]] = icmp eq i32 [[DIMS]], 2
+// CHECK-NEXT:    br i1 [[CMP1_2]], label [[CLEANUP]], label [[IF_END_2:%.*]]
+// CHECK:       if.end.2:
+// CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 2
+// CHECK-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_230:%.*]] = add nsw i32 [[TMP26]], [[TMP25]]
+// CHECK-NEXT:    store i32 [[ADD14_230]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_2:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 1
+// CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX11_1_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_2:%.*]] = add nsw i32 [[TMP29]], [[TMP28]]
+// CHECK-NEXT:    store i32 [[ADD14_1_2]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_2:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 2
+// CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX11_2_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_2:%.*]] = add nsw i32 [[TMP32]], [[TMP31]]
+// CHECK-NEXT:    store i32 [[ADD14_2_2]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_2:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 3
+// CHECK-NEXT:    [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX11_3_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_2:%.*]] = add nsw i32 [[TMP35]], [[TMP34]]
+// CHECK-NEXT:    store i32 [[ADD14_3_2]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_3:%.*]] = icmp eq i32 [[DIMS]], 3
+// CHECK-NEXT:    br i1 [[CMP1_3]], label [[CLEANUP]], label [[IF_END_3:%.*]]
+// CHECK:       if.end.3:
+// CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 3
+// CHECK-NEXT:    [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP38:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_331:%.*]] = add nsw i32 [[TMP38]], [[TMP37]]
+// CHECK-NEXT:    store i32 [[ADD14_331]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_3:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 1
+// CHECK-NEXT:    [[TMP40:%.*]] = load i32, ptr [[ARRAYIDX11_1_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP41:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_3:%.*]] = add nsw i32 [[TMP41]], [[TMP40]]
+// CHECK-NEXT:    store i32 [[ADD14_1_3]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP42:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_3:%.*]] = getelementptr inbounds i32, ptr [[TMP42]], i64 2
+// CHECK-NEXT:    [[TMP43:%.*]] = load i32, ptr [[ARRAYIDX11_2_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP44:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_3:%.*]] = add nsw i32 [[TMP44]], [[TMP43]]
+// CHECK-NEXT:    store i32 [[ADD14_2_3]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP45:%.*]] = load ptr, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_3:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i64 3
+// CHECK-NEXT:    [[TMP46:%.*]] = load i32, ptr [[ARRAYIDX11_3_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP47:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_3:%.*]] = add nsw i32 [[TMP47]], [[TMP46]]
+// CHECK-NEXT:    store i32 [[ADD14_3_3]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_4:%.*]] = icmp eq i32 [[DIMS]], 4
+// CHECK-NEXT:    br i1 [[CMP1_4]], label [[CLEANUP]], label [[IF_END_4:%.*]]
+// CHECK:       if.end.4:
+// CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 4
+// CHECK-NEXT:    [[TMP48:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP50:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_4:%.*]] = add nsw i32 [[TMP50]], [[TMP49]]
+// CHECK-NEXT:    store i32 [[ADD14_4]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP51:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_4:%.*]] = getelementptr inbounds i32, ptr [[TMP51]], i64 1
+// CHECK-NEXT:    [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX11_1_4]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP53:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_4:%.*]] = add nsw i32 [[TMP53]], [[TMP52]]
+// CHECK-NEXT:    store i32 [[ADD14_1_4]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP54:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_4:%.*]] = getelementptr inbounds i32, ptr [[TMP54]], i64 2
+// CHECK-NEXT:    [[TMP55:%.*]] = load i32, ptr [[ARRAYIDX11_2_4]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP56:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_4:%.*]] = add nsw i32 [[TMP56]], [[TMP55]]
+// CHECK-NEXT:    store i32 [[ADD14_2_4]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP57:%.*]] = load ptr, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_4:%.*]] = getelementptr inbounds i32, ptr [[TMP57]], i64 3
+// CHECK-NEXT:    [[TMP58:%.*]] = load i32, ptr [[ARRAYIDX11_3_4]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP59:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_4:%.*]] = add nsw i32 [[TMP59]], [[TMP58]]
+// CHECK-NEXT:    store i32 [[ADD14_3_4]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_5:%.*]] = icmp eq i32 [[DIMS]], 5
+// CHECK-NEXT:    br i1 [[CMP1_5]], label [[CLEANUP]], label [[IF_END_5:%.*]]
+// CHECK:       if.end.5:
+// CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 5
+// CHECK-NEXT:    [[TMP60:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP62:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_5:%.*]] = add nsw i32 [[TMP62]], [[TMP61]]
+// CHECK-NEXT:    store i32 [[ADD14_5]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP63:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_5:%.*]] = getelementptr inbounds i32, ptr [[TMP63]], i64 1
+// CHECK-NEXT:    [[TMP64:%.*]] = load i32, ptr [[ARRAYIDX11_1_5]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP65:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_5:%.*]] = add nsw i32 [[TMP65]], [[TMP64]]
+// CHECK-NEXT:    store i32 [[ADD14_1_5]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP66:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_5:%.*]] = getelementptr inbounds i32, ptr [[TMP66]], i64 2
+// CHECK-NEXT:    [[TMP67:%.*]] = load i32, ptr [[ARRAYIDX11_2_5]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP68:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_5:%.*]] = add nsw i32 [[TMP68]], [[TMP67]]
+// CHECK-NEXT:    store i32 [[ADD14_2_5]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP69:%.*]] = load ptr, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_5:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 3
+// CHECK-NEXT:    [[TMP70:%.*]] = load i32, ptr [[ARRAYIDX11_3_5]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP71:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_5:%.*]] = add nsw i32 [[TMP71]], [[TMP70]]
+// CHECK-NEXT:    store i32 [[ADD14_3_5]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_6:%.*]] = icmp eq i32 [[DIMS]], 6
+// CHECK-NEXT:    br i1 [[CMP1_6]], label [[CLEANUP]], label [[IF_END_6:%.*]]
+// CHECK:       if.end.6:
+// CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 6
+// CHECK-NEXT:    [[TMP72:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP74:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_6:%.*]] = add nsw i32 [[TMP74]], [[TMP73]]
+// CHECK-NEXT:    store i32 [[ADD14_6]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP75:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_6:%.*]] = getelementptr inbounds i32, ptr [[TMP75]], i64 1
+// CHECK-NEXT:    [[TMP76:%.*]] = load i32, ptr [[ARRAYIDX11_1_6]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP77:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_6:%.*]] = add nsw i32 [[TMP77]], [[TMP76]]
+// CHECK-NEXT:    store i32 [[ADD14_1_6]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP78:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_6:%.*]] = getelementptr inbounds i32, ptr [[TMP78]], i64 2
+// CHECK-NEXT:    [[TMP79:%.*]] = load i32, ptr [[ARRAYIDX11_2_6]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP80:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_6:%.*]] = add nsw i32 [[TMP80]], [[TMP79]]
+// CHECK-NEXT:    store i32 [[ADD14_2_6]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP81:%.*]] = load ptr, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_6:%.*]] = getelementptr inbounds i32, ptr [[TMP81]], i64 3
+// CHECK-NEXT:    [[TMP82:%.*]] = load i32, ptr [[ARRAYIDX11_3_6]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP83:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_6:%.*]] = add nsw i32 [[TMP83]], [[TMP82]]
+// CHECK-NEXT:    store i32 [[ADD14_3_6]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_7:%.*]] = icmp eq i32 [[DIMS]], 7
+// CHECK-NEXT:    br i1 [[CMP1_7]], label [[CLEANUP]], label [[IF_END_7:%.*]]
+// CHECK:       if.end.7:
+// CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 7
+// CHECK-NEXT:    [[TMP84:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP86:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_7:%.*]] = add nsw i32 [[TMP86]], [[TMP85]]
+// CHECK-NEXT:    store i32 [[ADD14_7]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP87:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_7:%.*]] = getelementptr inbounds i32, ptr [[TMP87]], i64 1
+// CHECK-NEXT:    [[TMP88:%.*]] = load i32, ptr [[ARRAYIDX11_1_7]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP89:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_7:%.*]] = add nsw i32 [[TMP89]], [[TMP88]]
+// CHECK-NEXT:    store i32 [[ADD14_1_7]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP90:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_7:%.*]] = getelementptr inbounds i32, ptr [[TMP90]], i64 2
+// CHECK-NEXT:    [[TMP91:%.*]] = load i32, ptr [[ARRAYIDX11_2_7]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP92:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_7:%.*]] = add nsw i32 [[TMP92]], [[TMP91]]
+// CHECK-NEXT:    store i32 [[ADD14_2_7]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP93:%.*]] = load ptr, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_7:%.*]] = getelementptr inbounds i32, ptr [[TMP93]], i64 3
+// CHECK-NEXT:    [[TMP94:%.*]] = load i32, ptr [[ARRAYIDX11_3_7]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP95:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_7:%.*]] = add nsw i32 [[TMP95]], [[TMP94]]
+// CHECK-NEXT:    store i32 [[ADD14_3_7]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_8:%.*]] = icmp eq i32 [[DIMS]], 8
+// CHECK-NEXT:    br i1 [[CMP1_8]], label [[CLEANUP]], label [[IF_END_8:%.*]]
+// CHECK:       if.end.8:
+// CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 8
+// CHECK-NEXT:    [[TMP96:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP97:%.*]] = load i32, ptr [[TMP96]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP98:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_8:%.*]] = add nsw i32 [[TMP98]], [[TMP97]]
+// CHECK-NEXT:    store i32 [[ADD14_8]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP99:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_8:%.*]] = getelementptr inbounds i32, ptr [[TMP99]], i64 1
+// CHECK-NEXT:    [[TMP100:%.*]] = load i32, ptr [[ARRAYIDX11_1_8]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP101:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_8:%.*]] = add nsw i32 [[TMP101]], [[TMP100]]
+// CHECK-NEXT:    store i32 [[ADD14_1_8]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP102:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_8:%.*]] = getelementptr inbounds i32, ptr [[TMP102]], i64 2
+// CHECK-NEXT:    [[TMP103:%.*]] = load i32, ptr [[ARRAYIDX11_2_8]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP104:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_8:%.*]] = add nsw i32 [[TMP104]], [[TMP103]]
+// CHECK-NEXT:    store i32 [[ADD14_2_8]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP105:%.*]] = load ptr, ptr [[ARRAYIDX_8]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_8:%.*]] = getelementptr inbounds i32, ptr [[TMP105]], i64 3
+// CHECK-NEXT:    [[TMP106:%.*]] = load i32, ptr [[ARRAYIDX11_3_8]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP107:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_8:%.*]] = add nsw i32 [[TMP107]], [[TMP106]]
+// CHECK-NEXT:    store i32 [[ADD14_3_8]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_9:%.*]] = icmp eq i32 [[DIMS]], 9
+// CHECK-NEXT:    br i1 [[CMP1_9]], label [[CLEANUP]], label [[IF_END_9:%.*]]
+// CHECK:       if.end.9:
+// CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 9
+// CHECK-NEXT:    [[TMP108:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP109:%.*]] = load i32, ptr [[TMP108]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP110:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_9:%.*]] = add nsw i32 [[TMP110]], [[TMP109]]
+// CHECK-NEXT:    store i32 [[ADD14_9]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP111:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_9:%.*]] = getelementptr inbounds i32, ptr [[TMP111]], i64 1
+// CHECK-NEXT:    [[TMP112:%.*]] = load i32, ptr [[ARRAYIDX11_1_9]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP113:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_9:%.*]] = add nsw i32 [[TMP113]], [[TMP112]]
+// CHECK-NEXT:    store i32 [[ADD14_1_9]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP114:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_9:%.*]] = getelementptr inbounds i32, ptr [[TMP114]], i64 2
+// CHECK-NEXT:    [[TMP115:%.*]] = load i32, ptr [[ARRAYIDX11_2_9]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP116:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_9:%.*]] = add nsw i32 [[TMP116]], [[TMP115]]
+// CHECK-NEXT:    store i32 [[ADD14_2_9]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP117:%.*]] = load ptr, ptr [[ARRAYIDX_9]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_9:%.*]] = getelementptr inbounds i32, ptr [[TMP117]], i64 3
+// CHECK-NEXT:    [[TMP118:%.*]] = load i32, ptr [[ARRAYIDX11_3_9]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP119:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_9:%.*]] = add nsw i32 [[TMP119]], [[TMP118]]
+// CHECK-NEXT:    store i32 [[ADD14_3_9]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_10:%.*]] = icmp eq i32 [[DIMS]], 10
+// CHECK-NEXT:    br i1 [[CMP1_10]], label [[CLEANUP]], label [[IF_END_10:%.*]]
+// CHECK:       if.end.10:
+// CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 10
+// CHECK-NEXT:    [[TMP120:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP121:%.*]] = load i32, ptr [[TMP120]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP122:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_10:%.*]] = add nsw i32 [[TMP122]], [[TMP121]]
+// CHECK-NEXT:    store i32 [[ADD14_10]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP123:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_10:%.*]] = getelementptr inbounds i32, ptr [[TMP123]], i64 1
+// CHECK-NEXT:    [[TMP124:%.*]] = load i32, ptr [[ARRAYIDX11_1_10]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP125:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_10:%.*]] = add nsw i32 [[TMP125]], [[TMP124]]
+// CHECK-NEXT:    store i32 [[ADD14_1_10]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP126:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_10:%.*]] = getelementptr inbounds i32, ptr [[TMP126]], i64 2
+// CHECK-NEXT:    [[TMP127:%.*]] = load i32, ptr [[ARRAYIDX11_2_10]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP128:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_10:%.*]] = add nsw i32 [[TMP128]], [[TMP127]]
+// CHECK-NEXT:    store i32 [[ADD14_2_10]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP129:%.*]] = load ptr, ptr [[ARRAYIDX_10]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_10:%.*]] = getelementptr inbounds i32, ptr [[TMP129]], i64 3
+// CHECK-NEXT:    [[TMP130:%.*]] = load i32, ptr [[ARRAYIDX11_3_10]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP131:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_10:%.*]] = add nsw i32 [[TMP131]], [[TMP130]]
+// CHECK-NEXT:    store i32 [[ADD14_3_10]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_11:%.*]] = icmp eq i32 [[DIMS]], 11
+// CHECK-NEXT:    br i1 [[CMP1_11]], label [[CLEANUP]], label [[IF_END_11:%.*]]
+// CHECK:       if.end.11:
+// CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 11
+// CHECK-NEXT:    [[TMP132:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP133:%.*]] = load i32, ptr [[TMP132]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP134:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_11:%.*]] = add nsw i32 [[TMP134]], [[TMP133]]
+// CHECK-NEXT:    store i32 [[ADD14_11]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP135:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_11:%.*]] = getelementptr inbounds i32, ptr [[TMP135]], i64 1
+// CHECK-NEXT:    [[TMP136:%.*]] = load i32, ptr [[ARRAYIDX11_1_11]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP137:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_11:%.*]] = add nsw i32 [[TMP137]], [[TMP136]]
+// CHECK-NEXT:    store i32 [[ADD14_1_11]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP138:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_11:%.*]] = getelementptr inbounds i32, ptr [[TMP138]], i64 2
+// CHECK-NEXT:    [[TMP139:%.*]] = load i32, ptr [[ARRAYIDX11_2_11]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP140:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_11:%.*]] = add nsw i32 [[TMP140]], [[TMP139]]
+// CHECK-NEXT:    store i32 [[ADD14_2_11]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP141:%.*]] = load ptr, ptr [[ARRAYIDX_11]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_11:%.*]] = getelementptr inbounds i32, ptr [[TMP141]], i64 3
+// CHECK-NEXT:    [[TMP142:%.*]] = load i32, ptr [[ARRAYIDX11_3_11]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP143:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_11:%.*]] = add nsw i32 [[TMP143]], [[TMP142]]
+// CHECK-NEXT:    store i32 [[ADD14_3_11]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_12:%.*]] = icmp eq i32 [[DIMS]], 12
+// CHECK-NEXT:    br i1 [[CMP1_12]], label [[CLEANUP]], label [[IF_END_12:%.*]]
+// CHECK:       if.end.12:
+// CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 12
+// CHECK-NEXT:    [[TMP144:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP145:%.*]] = load i32, ptr [[TMP144]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP146:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_12:%.*]] = add nsw i32 [[TMP146]], [[TMP145]]
+// CHECK-NEXT:    store i32 [[ADD14_12]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP147:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_12:%.*]] = getelementptr inbounds i32, ptr [[TMP147]], i64 1
+// CHECK-NEXT:    [[TMP148:%.*]] = load i32, ptr [[ARRAYIDX11_1_12]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP149:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_12:%.*]] = add nsw i32 [[TMP149]], [[TMP148]]
+// CHECK-NEXT:    store i32 [[ADD14_1_12]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP150:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_12:%.*]] = getelementptr inbounds i32, ptr [[TMP150]], i64 2
+// CHECK-NEXT:    [[TMP151:%.*]] = load i32, ptr [[ARRAYIDX11_2_12]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP152:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_12:%.*]] = add nsw i32 [[TMP152]], [[TMP151]]
+// CHECK-NEXT:    store i32 [[ADD14_2_12]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP153:%.*]] = load ptr, ptr [[ARRAYIDX_12]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_12:%.*]] = getelementptr inbounds i32, ptr [[TMP153]], i64 3
+// CHECK-NEXT:    [[TMP154:%.*]] = load i32, ptr [[ARRAYIDX11_3_12]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP155:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_12:%.*]] = add nsw i32 [[TMP155]], [[TMP154]]
+// CHECK-NEXT:    store i32 [[ADD14_3_12]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_13:%.*]] = icmp eq i32 [[DIMS]], 13
+// CHECK-NEXT:    br i1 [[CMP1_13]], label [[CLEANUP]], label [[IF_END_13:%.*]]
+// CHECK:       if.end.13:
+// CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 13
+// CHECK-NEXT:    [[TMP156:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP157:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP158:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_13:%.*]] = add nsw i32 [[TMP158]], [[TMP157]]
+// CHECK-NEXT:    store i32 [[ADD14_13]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP159:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_13:%.*]] = getelementptr inbounds i32, ptr [[TMP159]], i64 1
+// CHECK-NEXT:    [[TMP160:%.*]] = load i32, ptr [[ARRAYIDX11_1_13]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP161:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_13:%.*]] = add nsw i32 [[TMP161]], [[TMP160]]
+// CHECK-NEXT:    store i32 [[ADD14_1_13]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP162:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_13:%.*]] = getelementptr inbounds i32, ptr [[TMP162]], i64 2
+// CHECK-NEXT:    [[TMP163:%.*]] = load i32, ptr [[ARRAYIDX11_2_13]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP164:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_13:%.*]] = add nsw i32 [[TMP164]], [[TMP163]]
+// CHECK-NEXT:    store i32 [[ADD14_2_13]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP165:%.*]] = load ptr, ptr [[ARRAYIDX_13]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_13:%.*]] = getelementptr inbounds i32, ptr [[TMP165]], i64 3
+// CHECK-NEXT:    [[TMP166:%.*]] = load i32, ptr [[ARRAYIDX11_3_13]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP167:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_13:%.*]] = add nsw i32 [[TMP167]], [[TMP166]]
+// CHECK-NEXT:    store i32 [[ADD14_3_13]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_14:%.*]] = icmp eq i32 [[DIMS]], 14
+// CHECK-NEXT:    br i1 [[CMP1_14]], label [[CLEANUP]], label [[IF_END_14:%.*]]
+// CHECK:       if.end.14:
+// CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 14
+// CHECK-NEXT:    [[TMP168:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP169:%.*]] = load i32, ptr [[TMP168]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP170:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_14:%.*]] = add nsw i32 [[TMP170]], [[TMP169]]
+// CHECK-NEXT:    store i32 [[ADD14_14]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP171:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_14:%.*]] = getelementptr inbounds i32, ptr [[TMP171]], i64 1
+// CHECK-NEXT:    [[TMP172:%.*]] = load i32, ptr [[ARRAYIDX11_1_14]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP173:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_14:%.*]] = add nsw i32 [[TMP173]], [[TMP172]]
+// CHECK-NEXT:    store i32 [[ADD14_1_14]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP174:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_14:%.*]] = getelementptr inbounds i32, ptr [[TMP174]], i64 2
+// CHECK-NEXT:    [[TMP175:%.*]] = load i32, ptr [[ARRAYIDX11_2_14]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP176:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_14:%.*]] = add nsw i32 [[TMP176]], [[TMP175]]
+// CHECK-NEXT:    store i32 [[ADD14_2_14]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP177:%.*]] = load ptr, ptr [[ARRAYIDX_14]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_14:%.*]] = getelementptr inbounds i32, ptr [[TMP177]], i64 3
+// CHECK-NEXT:    [[TMP178:%.*]] = load i32, ptr [[ARRAYIDX11_3_14]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP179:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_14:%.*]] = add nsw i32 [[TMP179]], [[TMP178]]
+// CHECK-NEXT:    store i32 [[ADD14_3_14]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[CMP1_15:%.*]] = icmp eq i32 [[DIMS]], 15
+// CHECK-NEXT:    br i1 [[CMP1_15]], label [[CLEANUP]], label [[IF_END_15:%.*]]
+// CHECK:       if.end.15:
+// CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds ptr, ptr [[ARR]], i64 15
+// CHECK-NEXT:    [[TMP180:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[TMP181:%.*]] = load i32, ptr [[TMP180]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP182:%.*]] = load i32, ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_15:%.*]] = add nsw i32 [[TMP182]], [[TMP181]]
+// CHECK-NEXT:    store i32 [[ADD14_15]], ptr [[OUT]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP183:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_1_15:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i64 1
+// CHECK-NEXT:    [[TMP184:%.*]] = load i32, ptr [[ARRAYIDX11_1_15]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP185:%.*]] = load i32, ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_1_15:%.*]] = add nsw i32 [[TMP185]], [[TMP184]]
+// CHECK-NEXT:    store i32 [[ADD14_1_15]], ptr [[ARRAYIDX13_1]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP186:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_2_15:%.*]] = getelementptr inbounds i32, ptr [[TMP186]], i64 2
+// CHECK-NEXT:    [[TMP187:%.*]] = load i32, ptr [[ARRAYIDX11_2_15]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP188:%.*]] = load i32, ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_2_15:%.*]] = add nsw i32 [[TMP188]], [[TMP187]]
+// CHECK-NEXT:    store i32 [[ADD14_2_15]], ptr [[ARRAYIDX13_2]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    [[TMP189:%.*]] = load ptr, ptr [[ARRAYIDX_15]], align 8, !tbaa [[TBAA3]]
+// CHECK-NEXT:    [[ARRAYIDX11_3_15:%.*]] = getelementptr inbounds i32, ptr [[TMP189]], i64 3
+// CHECK-NEXT:    [[TMP190:%.*]] = load i32, ptr [[ARRAYIDX11_3_15]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[TMP191:%.*]] = load i32, ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ADD14_3_15:%.*]] = add nsw i32 [[TMP191]], [[TMP190]]
+// CHECK-NEXT:    store i32 [[ADD14_3_15]], ptr [[ARRAYIDX13_3]], align 4, !tbaa [[TBAA7]]
+// CHECK-NEXT:    tail call void @_Z3barv() #[[ATTR2]]
+// CHECK-NEXT:    br label [[CLEANUP]]
 // CHECK:       cleanup:
 // CHECK-NEXT:    ret void
 //
@@ -80,7 +535,4 @@ __device__ void func(int Idx, int *Arr[], int Dims, int *Out) {
 // CHECK: [[META6]] = !{!"Simple C++ TBAA"}
 // CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
 // CHECK: [[META8]] = !{!"int", [[META5]], i64 0}
-// CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
-// CHECK: [[META10]] = !{!"llvm.loop.mustprogress"}
-// CHECK: [[META11]] = !{!"llvm.loop.unroll.enable"}
 //.

diff  --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c09cf9c2325c4..ca5a55d9cd17c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -73,6 +73,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
 #include <cassert>
@@ -3634,6 +3635,59 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
   }
 }
 
+bool hasUnrollHint(Instruction *TI) {
+  MDNode *MD = TI->getMetadata(LLVMContext::MD_loop);
+  if (!MD)
+    return false;
+
+  return GetUnrollMetadata(MD, "llvm.loop.unroll.enable") ||
+         GetUnrollMetadata(MD, "llvm.loop.unroll.full") ||
+         GetUnrollMetadata(MD, "llvm.loop.unroll.count");
+}
+
+// Escape folding "I < ConstNum" with "Cond2" when loops with constant
+// iterations and expected unroll.
+// #pragma unroll
+// for (int I = 0; I < ConstNum; ++I) { // ConstNum > 1
+//   if (Cond2) {
+//     break;
+//   }
+//    xxx loop body;
+//  }
+// Folding these conditional branches may break/affect loop unroll.
+static bool isConstantLoopWithUnrollHint(BranchInst *PBI) {
+  ICmpInst *ICmp = dyn_cast<ICmpInst>(PBI->getCondition());
+  if (!ICmp)
+    return false;
+
+  // Make sure ConstNum > 1
+  bool DoFold = true;
+  for (unsigned I = 0; I < ICmp->getNumOperands(); ++I) {
+    ConstantInt *Op = dyn_cast<ConstantInt>(ICmp->getOperand(I));
+    if (!Op)
+      continue;
+    if (Op->getSExtValue() > 1) {
+      DoFold = false;
+      break;
+    }
+  }
+  if (DoFold)
+    return false;
+
+  // Loop information has not been established yet, so here we easily judge
+  // whether it is a loop by backedge.
+  BasicBlock *PBB = PBI->getParent();
+  for (Function::iterator I = PBB->getIterator(), E = PBB->getParent()->end();
+       I != E; ++I) {
+    BasicBlock *BB = &*I;
+    if (is_contained(predecessors(PBB), BB)) {
+      if (hasUnrollHint(BB->getTerminator()))
+        return true;
+    }
+  }
+  return false;
+}
+
 /// Determine if the two branches share a common destination and deduce a glue
 /// that joins the branches' conditions to arrive at the common destination if
 /// that would be profitable.
@@ -3645,6 +3699,9 @@ shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
   assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
          "PredBB must be a predecessor of BB.");
 
+  if (isConstantLoopWithUnrollHint(PBI))
+    return std::nullopt;
+
   // We have the potential to fold the conditions together, but if the
   // predecessor branch is predictable, we may not want to merge them.
   uint64_t PTWeight, PFWeight;


        


More information about the llvm-branch-commits mailing list