[clang] [llvm] [SimplifyCFG] Not folding branch in loop header with constant iterations (PR #74268)

via cfe-commits cfe-commits at lists.llvm.org
Mon Dec 4 05:24:52 PST 2023


xiangzh1 wrote:

> Can you please share the IR before the unroll pass?
Sure:

**with this patch**:


  4523 ; *** IR Dump After LoopDeletionPass on for.body ***
 4524
 4525 ; Preheader:
 4526 entry:
 4527   br label %for.body
 4528
 4529 ; Loop:
 4530 for.body:                                         ; preds = %entry, %for.body7
 4531   %Dim.027 = phi i32 [ 0, %entry ], [ %inc16, %for.body7 ]
 4532   %cmp1 = icmp eq i32 %Dim.027, %Dims
 4533   br i1 %cmp1, label %cleanup, label %if.end
 4534
 4535 if.end:                                           ; preds = %for.body
 4536   %idxprom = zext nneg i32 %Dim.027 to i64
 4537   %arrayidx = getelementptr inbounds ptr, ptr %Arr, i64 %idxprom
 4538   br label %for.body7
 4539
 4540 for.body7:                                        ; preds = %if.end
 4541   %0 = load ptr, ptr %arrayidx, align 8, !tbaa !3
 4542   %1 = load i32, ptr %0, align 4, !tbaa !7
 4543   %2 = load i32, ptr %Out, align 4, !tbaa !7
 4544   %add14 = add nsw i32 %2, %1
 4545   store i32 %add14, ptr %Out, align 4, !tbaa !7
 4546   tail call void @_Z3barv() #2
 4547   %3 = load ptr, ptr %arrayidx, align 8, !tbaa !3
 4548   %arrayidx11.1 = getelementptr inbounds i32, ptr %3, i64 1
 4549   %4 = load i32, ptr %arrayidx11.1, align 4, !tbaa !7
 4550   %arrayidx13.1 = getelementptr inbounds i32, ptr %Out, i64 1
 4551   %5 = load i32, ptr %arrayidx13.1, align 4, !tbaa !7
 4552   %add14.1 = add nsw i32 %5, %4
 4553   store i32 %add14.1, ptr %arrayidx13.1, align 4, !tbaa !7
 4554   tail call void @_Z3barv() #2
 4555   %6 = load ptr, ptr %arrayidx, align 8, !tbaa !3
 4556   %arrayidx11.2 = getelementptr inbounds i32, ptr %6, i64 2
 4557   %7 = load i32, ptr %arrayidx11.2, align 4, !tbaa !7
 4558   %arrayidx13.2 = getelementptr inbounds i32, ptr %Out, i64 2
 4559   %8 = load i32, ptr %arrayidx13.2, align 4, !tbaa !7
 4560   %add14.2 = add nsw i32 %8, %7
 4561   store i32 %add14.2, ptr %arrayidx13.2, align 4, !tbaa !7
 4562   tail call void @_Z3barv() #2
 4563   %9 = load ptr, ptr %arrayidx, align 8, !tbaa !3
 4564   %arrayidx11.3 = getelementptr inbounds i32, ptr %9, i64 3
 4565   %10 = load i32, ptr %arrayidx11.3, align 4, !tbaa !7
 4566   %arrayidx13.3 = getelementptr inbounds i32, ptr %Out, i64 3
 4567   %11 = load i32, ptr %arrayidx13.3, align 4, !tbaa !7
 4568   %add14.3 = add nsw i32 %11, %10
 4569   store i32 %add14.3, ptr %arrayidx13.3, align 4, !tbaa !7
 4570   tail call void @_Z3barv() #2
 4571   %inc16 = add nuw nsw i32 %Dim.027, 1
 4572   %exitcond = icmp ne i32 %inc16, 16
 4573   br i1 %exitcond, label %for.body, label %cleanup, !llvm.loop !9
 4574
 4575 ; Exit blocks
 4576 cleanup:                                          ; preds = %for.body, %for.body7
 4577   ret void
 4578
 4579 cleanup:                                          ; preds = %for.body, %for.body7
 4580   ret void

 4581 ; *** IR Dump After LoopFullUnrollPass on for.body (invalidated) ***


**without this patch**:
3829 ; *** IR Dump After LoopDeletionPass on if.end ***
3830
3831 ; Preheader:
3832 if.end.preheader:                                 ; preds = %entry
3833   %0 = add i32 %Dims, -1
3834   %umin = call i32 @llvm.umin.i32(i32 %0, i32 15)
3835   %1 = add nuw nsw i32 %umin, 1
3836   %wide.trip.count = zext i32 %1 to i64
3837   br label %if.end
3838
3839 ; Loop:
3840 if.end:                                           ; preds = %if.end.preheader, %for.body7
3841   %indvars.iv = phi i64 [ 0, %if.end.preheader ], [ %indvars.iv.next, %for.body7 ]
3842   %arrayidx = getelementptr inbounds ptr, ptr %Arr, i64 %indvars.iv
3843   br label %for.body7
3844
3845 for.body7:                                        ; preds = %if.end
3846   %2 = load ptr, ptr %arrayidx, align 8, !tbaa !3
3847   %3 = load i32, ptr %2, align 4, !tbaa !7
3848   %4 = load i32, ptr %Out, align 4, !tbaa !7
3849   %add14 = add nsw i32 %4, %3
3850   store i32 %add14, ptr %Out, align 4, !tbaa !7
3851   tail call void @_Z3barv() #3
3852   %5 = load ptr, ptr %arrayidx, align 8, !tbaa !3
3853   %arrayidx11.1 = getelementptr inbounds i32, ptr %5, i64 1
3854   %6 = load i32, ptr %arrayidx11.1, align 4, !tbaa !7
3855   %arrayidx13.1 = getelementptr inbounds i32, ptr %Out, i64 1
3856   %7 = load i32, ptr %arrayidx13.1, align 4, !tbaa !7
3857   %add14.1 = add nsw i32 %7, %6
3858   store i32 %add14.1, ptr %arrayidx13.1, align 4, !tbaa !7
3859   tail call void @_Z3barv() #3
3860   %8 = load ptr, ptr %arrayidx, align 8, !tbaa !3
3861   %arrayidx11.2 = getelementptr inbounds i32, ptr %8, i64 2
3862   %9 = load i32, ptr %arrayidx11.2, align 4, !tbaa !7
3863   %arrayidx13.2 = getelementptr inbounds i32, ptr %Out, i64 2
3864   %10 = load i32, ptr %arrayidx13.2, align 4, !tbaa !7
3865   %add14.2 = add nsw i32 %10, %9
3866   store i32 %add14.2, ptr %arrayidx13.2, align 4, !tbaa !7
3867   tail call void @_Z3barv() #3
3868   %11 = load ptr, ptr %arrayidx, align 8, !tbaa !3
3869   %arrayidx11.3 = getelementptr inbounds i32, ptr %11, i64 3
3870   %12 = load i32, ptr %arrayidx11.3, align 4, !tbaa !7
3871   %arrayidx13.3 = getelementptr inbounds i32, ptr %Out, i64 3
3872   %13 = load i32, ptr %arrayidx13.3, align 4, !tbaa !7
3873   %add14.3 = add nsw i32 %13, %12
3874   store i32 %add14.3, ptr %arrayidx13.3, align 4, !tbaa !7
3875   tail call void @_Z3barv() #3
3876   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
3877   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
3878   br i1 %exitcond, label %cleanup.loopexit, label %if.end, !llvm.loop !9
3879
3880 ; Exit blocks
3881 cleanup.loopexit:                                 ; preds = %for.body7
3882   br label %cleanup
3883 ; *** IR Dump After LoopFullUnrollPass on if.end ***


https://github.com/llvm/llvm-project/pull/74268


More information about the cfe-commits mailing list