[llvm] b48450c - [MergeFuncs] Use sizeWithoutDebug to decide if we create a thunk (#68627)

David Blaikie via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 16 08:18:56 PDT 2023


On Wed, Oct 11, 2023 at 5:33 PM via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

>
> Author: Oskar Wirga
> Date: 2023-10-11T17:32:41-07:00
> New Revision: b48450c2094fa48d3d968536876fa684eb21f330
>
> URL:
> https://github.com/llvm/llvm-project/commit/b48450c2094fa48d3d968536876fa684eb21f330
> DIFF:
> https://github.com/llvm/llvm-project/commit/b48450c2094fa48d3d968536876fa684eb21f330.diff
>
> LOG: [MergeFuncs] Use sizeWithoutDebug to decide if we create a thunk
> (#68627)
>
> I noticed that when we determine the size of the function to figure out
> if its profitable, we include debug instructions which can end up making
> larger functions than necessary.
>
> Added:
>     llvm/test/Transforms/MergeFunc/no-merge-debug-thunks.ll
>
> Modified:
>     llvm/lib/Transforms/IPO/MergeFunctions.cpp
>     llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll
>
> Removed:
>
>
>
>
> ################################################################################
> diff  --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
> b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
> index 89ddd7b6adebbec..c19cf71ba4c6af3 100644
> --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
> +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
> @@ -653,7 +653,7 @@ static bool canCreateThunkFor(Function *F) {
>    // Don't merge tiny functions using a thunk, since it can just end up
>    // making the function larger.
>    if (F->size() == 1) {
> -    if (F->front().size() <= 2) {
> +    if (F->front().sizeWithoutDebug() < 2) {
>

This would also change the size threshold for non-debug builds, though -
should it have been `<=` like the old code?


>        LLVM_DEBUG(dbgs() << "canCreateThunkFor: " << F->getName()
>                          << " is too small to bother creating a thunk
> for\n");
>        return false;
>
> diff  --git
> a/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll
> b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll
> index 13d94f8cf6a257c..72aa647ae0b2bdf 100644
> --- a/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll
> +++ b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll
> @@ -1,3 +1,4 @@
> +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
> UTC_ARGS: --version 2
>  ; RUN: opt -passes='default<O0>,mergefunc' -S
> -mergefunc-preserve-debug-info < %s | FileCheck %s
> --check-prefix=OPTIMIZATION_LEVEL_0
>  ; RUN: opt -passes='default<O2>,mergefunc' -S
> -mergefunc-preserve-debug-info < %s | FileCheck %s
> --check-prefix=OPTIMIZATION_LEVEL_2
>
> @@ -43,6 +44,48 @@
>
>  ; Function Attrs: nounwind uwtable
>  define i32 @maxA(i32 %x, i32 %y) !dbg !6 {
> +; OPTIMIZATION_LEVEL_0-LABEL: define i32 @maxA
> +; OPTIMIZATION_LEVEL_0-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) !dbg
> [[DBG6:![0-9]+]] {
> +; OPTIMIZATION_LEVEL_0-NEXT:  entry:
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[Y_ADDR:%.*]] = alloca i32, align 4
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[I:%.*]] = alloca i32, align 4
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[M:%.*]] = alloca i32, align 4
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[J:%.*]] = alloca i32, align 4
> +; OPTIMIZATION_LEVEL_0-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
> +; OPTIMIZATION_LEVEL_0-NEXT:    call void @llvm.dbg.declare(metadata ptr
> [[X_ADDR]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG12:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    store i32 [[Y]], ptr [[Y_ADDR]], align 4
> +; OPTIMIZATION_LEVEL_0-NEXT:    call void @llvm.dbg.declare(metadata ptr
> [[Y_ADDR]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG14:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    call void @llvm.dbg.declare(metadata ptr
> [[I]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG16:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    call void @llvm.dbg.declare(metadata ptr
> [[M]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG18:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    call void @llvm.dbg.declare(metadata ptr
> [[J]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG20:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]],
> align 4, !dbg [[DBG21:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]],
> align 4, !dbg [[DBG23:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]],
> [[TMP1]], !dbg [[DBG24:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]],
> label [[IF_ELSE:%.*]], !dbg [[DBG25:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0:       if.then:
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X_ADDR]],
> align 4, !dbg [[DBG26:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    store i32 [[TMP2]], ptr [[M]], align 4,
> !dbg [[DBG27:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    br label [[IF_END:%.*]], !dbg
> [[DBG28:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0:       if.else:
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[TMP3:%.*]] = load i32, ptr [[Y_ADDR]],
> align 4, !dbg [[DBG29:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    store i32 [[TMP3]], ptr [[M]], align 4,
> !dbg [[DBG30:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    br label [[IF_END]]
> +; OPTIMIZATION_LEVEL_0:       if.end:
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[TMP4:%.*]] = load i32, ptr [[M]], align
> 4, !dbg [[DBG31:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    ret i32 [[TMP4]], !dbg [[DBG32:![0-9]+]]
> +;
> +; OPTIMIZATION_LEVEL_2-LABEL: define i32 @maxA
> +; OPTIMIZATION_LEVEL_2-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]])
> local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] {
> +; OPTIMIZATION_LEVEL_2-NEXT:  entry:
> +; OPTIMIZATION_LEVEL_2-NEXT:    call void @llvm.dbg.value(metadata i32
> [[X]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG12:![0-9]+]]
> +; OPTIMIZATION_LEVEL_2-NEXT:    call void @llvm.dbg.value(metadata i32
> [[Y]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG12]]
> +; OPTIMIZATION_LEVEL_2-NEXT:    call void @llvm.dbg.declare(metadata ptr
> undef, metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG15:![0-9]+]]
> +; OPTIMIZATION_LEVEL_2-NEXT:    call void @llvm.dbg.declare(metadata ptr
> undef, metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG17:![0-9]+]]
> +; OPTIMIZATION_LEVEL_2-NEXT:    [[X_Y:%.*]] = tail call i32
> @llvm.smax.i32(i32 [[X]], i32 [[Y]])
> +; OPTIMIZATION_LEVEL_2-NEXT:    call void @llvm.dbg.value(metadata i32
> [[X_Y]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG12]]
> +; OPTIMIZATION_LEVEL_2-NEXT:    ret i32 [[X_Y]], !dbg [[DBG19:![0-9]+]]
> +;
>  entry:
>    %x.addr = alloca i32, align 4
>    %y.addr = alloca i32, align 4
> @@ -81,26 +124,27 @@ declare void @llvm.dbg.declare(metadata, metadata,
> metadata)
>
>  ; Function Attrs: nounwind uwtable
>  define i32 @maxB(i32 %x, i32 %y) !dbg !34 {
> +; OPTIMIZATION_LEVEL_0-LABEL: define i32 @maxB
> +; OPTIMIZATION_LEVEL_0-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) !dbg
> [[DBG33:![0-9]+]] {
> +; OPTIMIZATION_LEVEL_0-NEXT:  entry:
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[Y_ADDR:%.*]] = alloca i32, align 4
> +; OPTIMIZATION_LEVEL_0-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
> +; OPTIMIZATION_LEVEL_0-NEXT:    call void @llvm.dbg.declare(metadata ptr
> [[X_ADDR]], metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG35:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    store i32 [[Y]], ptr [[Y_ADDR]], align 4
> +; OPTIMIZATION_LEVEL_0-NEXT:    call void @llvm.dbg.declare(metadata ptr
> [[Y_ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG37:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[TMP0:%.*]] = tail call i32 @maxA(i32
> [[X]], i32 [[Y]]), !dbg [[DBG38:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    ret i32 [[TMP0]], !dbg [[DBG38]]
> +;
> +; OPTIMIZATION_LEVEL_2-LABEL: define i32 @maxB
> +; OPTIMIZATION_LEVEL_2-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]])
> local_unnamed_addr #[[ATTR0]] !dbg [[DBG20:![0-9]+]] {
> +; OPTIMIZATION_LEVEL_2-NEXT:  entry:
> +; OPTIMIZATION_LEVEL_2-NEXT:    call void @llvm.dbg.value(metadata i32
> [[X]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG22:![0-9]+]]
> +; OPTIMIZATION_LEVEL_2-NEXT:    call void @llvm.dbg.value(metadata i32
> [[Y]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg
> [[DBG22]]
> +; OPTIMIZATION_LEVEL_2-NEXT:    [[TMP0:%.*]] = tail call i32 @maxA(i32
> [[X]], i32 [[Y]]) #[[ATTR0]], !dbg [[DBG24:![0-9]+]]
> +; OPTIMIZATION_LEVEL_2-NEXT:    ret i32 [[TMP0]], !dbg [[DBG24]]
> +;
>
> -; OPTIMIZATION_LEVEL_0: define i32 @maxB(i32 %x, i32 %y)
> -; OPTIMIZATION_LEVEL_0-NEXT: entry:
> -; OPTIMIZATION_LEVEL_0-NEXT: %x.addr = alloca i32, align 4
> -; OPTIMIZATION_LEVEL_0-NEXT: %y.addr = alloca i32, align 4
> -; OPTIMIZATION_LEVEL_0-NEXT: store i32 %x, ptr %x.addr, align 4
> -; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr
> %x.addr, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
> -; OPTIMIZATION_LEVEL_0-NEXT: store i32 %y, ptr %y.addr, align 4
> -; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr
> %y.addr, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
> -; OPTIMIZATION_LEVEL_0-NEXT: %0 = tail call i32 @maxA(i32 %x, i32 %y),
> !dbg !{{[0-9]+}}
> -; OPTIMIZATION_LEVEL_0-NEXT: ret i32 %0, !dbg !{{[0-9]+}}
> -; OPTIMIZATION_LEVEL_0-NEXT: }
> -
> -; OPTIMIZATION_LEVEL_2: define i32 @maxB(i32 %x, i32 %y)
> -; OPTIMIZATION_LEVEL_2-NEXT: entry:
> -; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 %x,
> metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
> -; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 %y,
> metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}}
> -; OPTIMIZATION_LEVEL_2-NEXT: %0 = tail call i32 @maxA(i32 %x, i32 %y)
> #{{[0-9]+}}, !dbg !{{[0-9]+}}
> -; OPTIMIZATION_LEVEL_2-NEXT: ret i32 %0, !dbg !{{[0-9]+}}
> -; OPTIMIZATION_LEVEL_2-NEXT: }
>
>  entry:
>    %x.addr = alloca i32, align 4
> @@ -137,18 +181,19 @@ if.end:                                           ;
> preds = %if.else, %if.then
>
>  ; Function Attrs: nounwind uwtable
>  define void @f() !dbg !57 {
> +; OPTIMIZATION_LEVEL_0-LABEL: define void @f
> +; OPTIMIZATION_LEVEL_0-SAME: () !dbg [[DBG39:![0-9]+]] {
> +; OPTIMIZATION_LEVEL_0-NEXT:  entry:
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[CALL:%.*]] = call i32 @maxA(i32 3, i32
> 4), !dbg [[DBG42:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    [[CALL1:%.*]] = call i32 @maxB(i32 1, i32
> 9), !dbg [[DBG43:![0-9]+]]
> +; OPTIMIZATION_LEVEL_0-NEXT:    ret void, !dbg [[DBG44:![0-9]+]]
> +;
> +; OPTIMIZATION_LEVEL_2-LABEL: define void @f
> +; OPTIMIZATION_LEVEL_2-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] !dbg
> [[DBG25:![0-9]+]] {
> +; OPTIMIZATION_LEVEL_2-NEXT:  entry:
> +; OPTIMIZATION_LEVEL_2-NEXT:    ret void, !dbg [[DBG28:![0-9]+]]
> +;
>  entry:
> -
> -; OPTIMIZATION_LEVEL_0: define void @f()
> -; OPTIMIZATION_LEVEL_0-NEXT: entry:
> -; OPTIMIZATION_LEVEL_0-NEXT: %call = call i32 @maxA(i32 3, i32 4), !dbg
> !{{[0-9]+}}
> -; OPTIMIZATION_LEVEL_0-NEXT: %call1 = call i32 @maxB(i32 1, i32 9), !dbg
> !{{[0-9]+}}
> -; OPTIMIZATION_LEVEL_0-NEXT: ret void, !dbg !{{[0-9]+}}
> -
> -; OPTIMIZATION_LEVEL_2: define void @f()
> -; OPTIMIZATION_LEVEL_2-NEXT: entry:
> -; OPTIMIZATION_LEVEL_2-NEXT: ret void, !dbg !{{[0-9]+}}
> -
>    %call = call i32 @maxA(i32 3, i32 4), !dbg !60
>    %call1 = call i32 @maxB(i32 1, i32 9), !dbg !61
>    ret void, !dbg !62
>
> diff  --git a/llvm/test/Transforms/MergeFunc/no-merge-debug-thunks.ll
> b/llvm/test/Transforms/MergeFunc/no-merge-debug-thunks.ll
> new file mode 100644
> index 000000000000000..80467c7aee407f8
> --- /dev/null
> +++ b/llvm/test/Transforms/MergeFunc/no-merge-debug-thunks.ll
> @@ -0,0 +1,61 @@
> +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
> UTC_ARGS: --version 2
> +;; Make sure debug instructions are not counted when deciding to merge
> functions
> +; RUN: opt -S -passes=mergefunc < %s | FileCheck %s
> +
> +target datalayout =
> "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
> +
> +; Function Attrs: nounwind readnone
> +define hidden i32 @f(i32 %t) {
> +; CHECK-LABEL: define hidden i32 @f
> +; CHECK-SAME: (i32 [[T:%.*]]) {
> +; CHECK-NEXT:  entry:
> +; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[T]], metadata
> [[META6:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]]
> +; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[T]], metadata
> [[META6]], metadata !DIExpression()), !dbg [[DBG12]]
> +; CHECK-NEXT:    ret i32 0
> +;
> +entry:
> +  call void @llvm.dbg.value(metadata i32 %t, metadata !12, metadata
> !DIExpression()), !dbg !13
> +  call void @llvm.dbg.value(metadata i32 %t, metadata !12, metadata
> !DIExpression()), !dbg !13
> +  ret i32 0
> +}
> +
> +; Function Attrs: nounwind readnone
> +define hidden i32 @f_thunk(i32 %t) {
> +; CHECK-LABEL: define hidden i32 @f_thunk
> +; CHECK-SAME: (i32 [[T:%.*]]) {
> +; CHECK-NEXT:  entry:
> +; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[T]], metadata
> [[META6]], metadata !DIExpression()), !dbg [[DBG12]]
> +; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[T]], metadata
> [[META6]], metadata !DIExpression()), !dbg [[DBG12]]
> +; CHECK-NEXT:    ret i32 0
> +;
> +entry:
> +  call void @llvm.dbg.value(metadata i32 %t, metadata !12, metadata
> !DIExpression()), !dbg !13
> +  call void @llvm.dbg.value(metadata i32 %t, metadata !12, metadata
> !DIExpression()), !dbg !13
> +  ret i32 0
> +}
> +
> +; Function Attrs: nounwind readnone speculatable
> +declare void @llvm.dbg.value(metadata, metadata, metadata)
> +
> +!llvm.dbg.cu = !{!0}
> +!llvm.module.flags = !{!3, !4, !5}
> +
> +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1,
> isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
> +!1 = !DIFile(filename: "no-merge-debug-thunks.c", directory: "/tmp")
> +!2 = !{}
> +!3 = !{i32 2, !"Dwarf Version", i32 4}
> +!4 = !{i32 2, !"Debug Info Version", i32 3}
> +!5 = !{i32 1, !"wchar_size", i32 4}
> +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 3,
> type: !8, isLocal: false, isDefinition: true, scopeLine: 3, flags:
> DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !11)
> +!8 = !DISubroutineType(types: !9)
> +!9 = !{!10, !10}
> +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
> +!11 = !{!12}
> +!12 = !DILocalVariable(name: "t", arg: 1, scope: !7, file: !1, line: 3,
> type: !10)
> +!13 = !DILocation(line: 3, column: 14, scope: !7)
> +!14 = !DILocation(line: 4, column: 12, scope: !7)
> +!16 = distinct !DISubprogram(name: "_start", scope: !1, file: !1, line:
> 7, type: !17, isLocal: false, isDefinition: true, scopeLine: 7,
> isOptimized: true, unit: !0, retainedNodes: !2)
> +!17 = !DISubroutineType(types: !18)
> +!18 = !{!10}
> +!19 = !DILocation(line: 8, column: 3, scope: !16)
> +!20 = !DILocation(line: 9, column: 3, scope: !16)
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20231016/c3ab0c5b/attachment.html>


More information about the llvm-commits mailing list