[llvm] [BOLT] Extract call continuation traces from pre-aggregated profile (PR #109486)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 27 11:44:01 PDT 2024


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/109486

>From e713939a87239ea57c15849dc0f9a1fcc49e73fb Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Fri, 20 Sep 2024 15:04:50 -0700
Subject: [PATCH 1/8] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 bolt/test/X86/Inputs/callcont-fallthru.preagg |  21 +
 bolt/test/X86/Inputs/callcont-fallthru.yaml   | 889 ++++++++++++++++++
 bolt/test/X86/callcont-fallthru.test          |   9 +
 3 files changed, 919 insertions(+)
 create mode 100644 bolt/test/X86/Inputs/callcont-fallthru.preagg
 create mode 100644 bolt/test/X86/Inputs/callcont-fallthru.yaml
 create mode 100644 bolt/test/X86/callcont-fallthru.test

diff --git a/bolt/test/X86/Inputs/callcont-fallthru.preagg b/bolt/test/X86/Inputs/callcont-fallthru.preagg
new file mode 100644
index 00000000000000..0b5f344540573a
--- /dev/null
+++ b/bolt/test/X86/Inputs/callcont-fallthru.preagg
@@ -0,0 +1,21 @@
+B ffffffff81e01006 401194 8 0
+B 401180 401199 98482 96
+B 401199 401166 99542 0
+B 401177 401130 102776 0
+B 401135 40117c 103204 0
+B 401186 40118b 1022983 0
+B 401194 40117c 1021645 1
+F 40117c 401135 1161
+F 40117c 401180 92267
+F 40118b 401194 991002
+F 40117c 401186 968072
+F 40118b 401186 11468
+F 401130 401135 100015
+F 401166 401177 96992
+F 401199 401199 96168
+F 40117c ffffffff81e01006 7
+F 401199 401180 1140
+F 401194 ffffffff81e01006 1
+F 40117c 401194 11522
+F 401166 401199 1151
+F 401130 401177 1154
diff --git a/bolt/test/X86/Inputs/callcont-fallthru.yaml b/bolt/test/X86/Inputs/callcont-fallthru.yaml
new file mode 100644
index 00000000000000..a1f8417d1e217d
--- /dev/null
+++ b/bolt/test/X86/Inputs/callcont-fallthru.yaml
@@ -0,0 +1,889 @@
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_EXEC
+  Machine:         EM_X86_64
+  Entry:           0x401040
+ProgramHeaders:
+  - Type:            PT_PHDR
+    Flags:           [ PF_R ]
+    VAddr:           0x400040
+    Align:           0x8
+  - Type:            PT_INTERP
+    Flags:           [ PF_R ]
+    FirstSec:        .interp
+    LastSec:         .interp
+    VAddr:           0x400318
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .interp
+    LastSec:         .rela.plt
+    VAddr:           0x400000
+    Align:           0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_X, PF_R ]
+    FirstSec:        .init
+    LastSec:         .fini
+    VAddr:           0x401000
+    Align:           0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_R ]
+    FirstSec:        .rodata
+    LastSec:         .eh_frame
+    VAddr:           0x402000
+    Align:           0x1000
+  - Type:            PT_LOAD
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .bss
+    VAddr:           0x403DE8
+    Align:           0x1000
+  - Type:            PT_DYNAMIC
+    Flags:           [ PF_W, PF_R ]
+    FirstSec:        .dynamic
+    LastSec:         .dynamic
+    VAddr:           0x403DF8
+    Align:           0x8
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x400338
+    Align:           0x8
+  - Type:            PT_NOTE
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.build-id
+    LastSec:         .note.ABI-tag
+    VAddr:           0x400358
+    Align:           0x4
+  - Type:            PT_GNU_PROPERTY
+    Flags:           [ PF_R ]
+    FirstSec:        .note.gnu.property
+    LastSec:         .note.gnu.property
+    VAddr:           0x400338
+    Align:           0x8
+  - Type:            PT_GNU_EH_FRAME
+    Flags:           [ PF_R ]
+    FirstSec:        .eh_frame_hdr
+    LastSec:         .eh_frame_hdr
+    VAddr:           0x402010
+    Align:           0x4
+  - Type:            PT_GNU_STACK
+    Flags:           [ PF_W, PF_R ]
+    Align:           0x10
+  - Type:            PT_GNU_RELRO
+    Flags:           [ PF_R ]
+    FirstSec:        .init_array
+    LastSec:         .got
+    VAddr:           0x403DE8
+Sections:
+  - Name:            .interp
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400318
+    AddressAlign:    0x1
+    Content:         2F6C696236342F6C642D6C696E75782D7838362D36342E736F2E3200
+  - Name:            .note.gnu.property
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400338
+    AddressAlign:    0x8
+    Notes:
+      - Name:            GNU
+        Desc:            028000C0040000000300000000000000
+        Type:            NT_GNU_PROPERTY_TYPE_0
+  - Name:            .note.gnu.build-id
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400358
+    AddressAlign:    0x4
+    Notes:
+      - Name:            GNU
+        Desc:            A77EA471B9AAA21E180E5FD02A0A0B2E4AB643E9
+        Type:            NT_PRPSINFO
+  - Name:            .note.ABI-tag
+    Type:            SHT_NOTE
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x40037C
+    AddressAlign:    0x4
+    Notes:
+      - Name:            GNU
+        Desc:            '00000000030000000200000000000000'
+        Type:            NT_VERSION
+  - Name:            .gnu.hash
+    Type:            SHT_GNU_HASH
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x4003A0
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Header:
+      SymNdx:          0x1
+      Shift2:          0x0
+    BloomFilter:     [ 0x0 ]
+    HashBuckets:     [ 0x0 ]
+    HashValues:      [  ]
+  - Name:            .dynsym
+    Type:            SHT_DYNSYM
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x4003C0
+    Link:            .dynstr
+    AddressAlign:    0x8
+  - Name:            .dynstr
+    Type:            SHT_STRTAB
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400450
+    AddressAlign:    0x1
+  - Name:            .gnu.version
+    Type:            SHT_GNU_versym
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x4004CE
+    Link:            .dynsym
+    AddressAlign:    0x2
+    Entries:         [ 0, 2, 1, 1, 3, 1 ]
+  - Name:            .gnu.version_r
+    Type:            SHT_GNU_verneed
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x4004E0
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Dependencies:
+      - Version:         1
+        File:            libc.so.6
+        Entries:
+          - Name:            GLIBC_2.2.5
+            Hash:            157882997
+            Flags:           0
+            Other:           3
+          - Name:            GLIBC_2.34
+            Hash:            110530996
+            Flags:           0
+            Other:           2
+  - Name:            .rela.dyn
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x400510
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Relocations:
+      - Offset:          0x403FC8
+        Symbol:          __libc_start_main
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x403FD0
+        Symbol:          _ITM_deregisterTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x403FD8
+        Symbol:          __gmon_start__
+        Type:            R_X86_64_GLOB_DAT
+      - Offset:          0x403FE0
+        Symbol:          _ITM_registerTMCloneTable
+        Type:            R_X86_64_GLOB_DAT
+  - Name:            .rela.plt
+    Type:            SHT_RELA
+    Flags:           [ SHF_ALLOC, SHF_INFO_LINK ]
+    Address:         0x400570
+    Link:            .dynsym
+    AddressAlign:    0x8
+    Info:            .got.plt
+    Relocations:
+      - Offset:          0x404000
+        Symbol:          atoi
+        Type:            R_X86_64_JUMP_SLOT
+  - Name:            .init
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x401000
+    AddressAlign:    0x4
+    Offset:          0x1000
+    Content:         F30F1EFA4883EC08488B05C92F00004885C07402FFD04883C408C3
+  - Name:            .plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x401020
+    AddressAlign:    0x10
+    EntSize:         0x10
+    Content:         FF35CA2F0000FF25CC2F00000F1F4000FF25CA2F00006800000000E9E0FFFFFF
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x401040
+    AddressAlign:    0x10
+    Content:         F30F1EFA31ED4989D15E4889E24883E4F050544531C031C948C7C740114000FF15632F0000F4662E0F1F840000000000F30F1EFAC3662E0F1F84000000000090488D3D892F0000488D05822F00004839F87415488B05362F00004885C07409FFE00F1F8000000000C30F1F8000000000488D3D592F0000488D35522F00004829FE4889F048C1EE3F48C1F8034801C648D1FE7414488B05052F00004885C07408FFE0660F1F440000C30F1F8000000000F30F1EFA803D152F0000007513554889E5E87AFFFFFFC605032F0000015DC390C366662E0F1F8400000000000F1F4000F30F1EFAEB8A662E0F1F840000000000554889E55DC3662E0F1F840000000000554889E54883EC20C745FC00000000897DF8488975F0488B45F0488B7808E8CDFEFFFF8945EC837DEC000F842E000000C745E80A000000E8B4FFFFFF837DE8000F8413000000E9000000008B45E883C0FF8945E8E9E3FFFFFFE9C8FFFFFF31C04883C4205DC3
+  - Name:            .fini
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x4011A8
+    AddressAlign:    0x4
+    Content:         F30F1EFA4883EC084883C408C3
+  - Name:            .rodata
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x402000
+    AddressAlign:    0x8
+    Offset:          0x2000
+    Content:         '01000200000000000000000000000000'
+  - Name:            .eh_frame_hdr
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x402010
+    AddressAlign:    0x4
+    Content:         011B033B340000000500000010F0FFFF7800000030F0FFFF5000000060F0FFFF6400000020F1FFFFA000000030F1FFFFC0000000
+  - Name:            .eh_frame
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x402048
+    AddressAlign:    0x8
+    Content:         1400000000000000017A5200017810011B0C070890010000100000001C000000D8EFFFFF26000000004407101000000030000000F4EFFFFF0500000000000000240000004400000090EFFFFF20000000000E10460E184A0F0B770880003F1A3B2A332422000000001C0000006C00000078F0FFFF0600000000410E108602430D06410C07080000001C0000008C00000068F0FFFF6600000000410E108602430D0602610C0708000000000000
+  - Name:            .init_array
+    Type:            SHT_INIT_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x403DE8
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Offset:          0x2DE8
+    Content:         '2011400000000000'
+  - Name:            .fini_array
+    Type:            SHT_FINI_ARRAY
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x403DF0
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         F010400000000000
+  - Name:            .dynamic
+    Type:            SHT_DYNAMIC
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x403DF8
+    Link:            .dynstr
+    AddressAlign:    0x8
+    Entries:
+      - Tag:             DT_NEEDED
+        Value:           0x18
+      - Tag:             DT_INIT
+        Value:           0x401000
+      - Tag:             DT_FINI
+        Value:           0x4011A8
+      - Tag:             DT_INIT_ARRAY
+        Value:           0x403DE8
+      - Tag:             DT_INIT_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_FINI_ARRAY
+        Value:           0x403DF0
+      - Tag:             DT_FINI_ARRAYSZ
+        Value:           0x8
+      - Tag:             DT_GNU_HASH
+        Value:           0x4003A0
+      - Tag:             DT_STRTAB
+        Value:           0x400450
+      - Tag:             DT_SYMTAB
+        Value:           0x4003C0
+      - Tag:             DT_STRSZ
+        Value:           0x7E
+      - Tag:             DT_SYMENT
+        Value:           0x18
+      - Tag:             DT_DEBUG
+        Value:           0x0
+      - Tag:             DT_PLTGOT
+        Value:           0x403FE8
+      - Tag:             DT_PLTRELSZ
+        Value:           0x18
+      - Tag:             DT_PLTREL
+        Value:           0x7
+      - Tag:             DT_JMPREL
+        Value:           0x400570
+      - Tag:             DT_RELA
+        Value:           0x400510
+      - Tag:             DT_RELASZ
+        Value:           0x60
+      - Tag:             DT_RELAENT
+        Value:           0x18
+      - Tag:             DT_VERNEED
+        Value:           0x4004E0
+      - Tag:             DT_VERNEEDNUM
+        Value:           0x1
+      - Tag:             DT_VERSYM
+        Value:           0x4004CE
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+      - Tag:             DT_NULL
+        Value:           0x0
+  - Name:            .got
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x403FC8
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         '0000000000000000000000000000000000000000000000000000000000000000'
+  - Name:            .got.plt
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x403FE8
+    AddressAlign:    0x8
+    EntSize:         0x8
+    Content:         F83D400000000000000000000000000000000000000000003610400000000000
+  - Name:            .data
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x404008
+    AddressAlign:    0x1
+    Content:         '00000000'
+  - Name:            .tm_clone_table
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x404010
+    AddressAlign:    0x8
+  - Name:            .bss
+    Type:            SHT_NOBITS
+    Flags:           [ SHF_WRITE, SHF_ALLOC ]
+    Address:         0x404010
+    AddressAlign:    0x1
+    Size:            0x8
+  - Name:            .comment
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         4743433A2028474E55292031312E352E302032303234303731392028526564204861742031312E352E302D3229004743433A2028474E55292031332E332E312032303234303631312028526564204861742031332E332E312D322900636C616E672076657273696F6E2031382E312E38202843656E744F532031382E312E382D332E656C392900
+  - Name:            .gnu.build.attributes
+    Type:            SHT_NOTE
+    Address:         0x406018
+    AddressAlign:    0x4
+    Notes:
+      - Name:            "GA$\x013a1"
+        Desc:            '40104000000000006610400000000000'
+        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
+      - Name:            "GA$\x013a1"
+        Desc:            '75104000000000007510400000000000'
+        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
+      - Name:            "GA$\x013a1"
+        Desc:            '00104000000000001610400000000000'
+        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
+      - Name:            "GA$\x013a1"
+        Desc:            A811400000000000B011400000000000
+        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
+      - Name:            "GA$\x013a1"
+        Desc:            '80104000000000002611400000000000'
+        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
+      - Name:            "GA$\x013a1"
+        Desc:            A611400000000000A611400000000000
+        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
+      - Name:            "GA$\x013a1"
+        Desc:            A611400000000000A611400000000000
+        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
+      - Name:            "GA$\x013a1"
+        Desc:            16104000000000001B10400000000000
+        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
+      - Name:            "GA$\x013a1"
+        Desc:            B011400000000000B511400000000000
+        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
+  - Name:            .rela.init
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .init
+    Relocations:
+      - Offset:          0x40100B
+        Symbol:          __gmon_start__
+        Type:            R_X86_64_REX_GOTPCRELX
+        Addend:          -4
+  - Name:            .rela.text
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .text
+    Relocations:
+      - Offset:          0x40105B
+        Symbol:          main
+        Type:            R_X86_64_32S
+      - Offset:          0x401061
+        Symbol:          '__libc_start_main at GLIBC_2.34'
+        Type:            R_X86_64_GOTPCRELX
+        Addend:          -4
+      - Offset:          0x401083
+        Symbol:          .tm_clone_table
+        Type:            R_X86_64_PC32
+        Addend:          -4
+      - Offset:          0x40108A
+        Symbol:          __TMC_END__
+        Type:            R_X86_64_PC32
+        Addend:          -4
+      - Offset:          0x401096
+        Symbol:          _ITM_deregisterTMCloneTable
+        Type:            R_X86_64_REX_GOTPCRELX
+        Addend:          -4
+      - Offset:          0x4010B3
+        Symbol:          .tm_clone_table
+        Type:            R_X86_64_PC32
+        Addend:          -4
+      - Offset:          0x4010BA
+        Symbol:          __TMC_END__
+        Type:            R_X86_64_PC32
+        Addend:          -4
+      - Offset:          0x4010D7
+        Symbol:          _ITM_registerTMCloneTable
+        Type:            R_X86_64_REX_GOTPCRELX
+        Addend:          -4
+      - Offset:          0x4010F6
+        Symbol:          .bss
+        Type:            R_X86_64_PC32
+        Addend:          -5
+      - Offset:          0x401108
+        Symbol:          .bss
+        Type:            R_X86_64_PC32
+        Addend:          -5
+      - Offset:          0x40115F
+        Symbol:          'atoi at GLIBC_2.2.5'
+        Type:            R_X86_64_PLT32
+        Addend:          -4
+      - Offset:          0x401178
+        Symbol:          foo
+        Type:            R_X86_64_PLT32
+        Addend:          -4
+  - Name:            .rela.eh_frame
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .eh_frame
+    Relocations:
+      - Offset:          0x402068
+        Symbol:          .text
+        Type:            R_X86_64_PC32
+      - Offset:          0x40207C
+        Symbol:          .text
+        Type:            R_X86_64_PC32
+        Addend:          48
+      - Offset:          0x4020B8
+        Symbol:          .text
+        Type:            R_X86_64_PC32
+        Addend:          240
+      - Offset:          0x4020D8
+        Symbol:          .text
+        Type:            R_X86_64_PC32
+        Addend:          256
+  - Name:            .rela.init_array
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .init_array
+    Relocations:
+      - Offset:          0x403DE8
+        Symbol:          .text
+        Type:            R_X86_64_64
+        Addend:          224
+  - Name:            .rela.fini_array
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .fini_array
+    Relocations:
+      - Offset:          0x403DF0
+        Symbol:          .text
+        Type:            R_X86_64_64
+        Addend:          176
+  - Name:            .rela.gnu.build.attributes
+    Type:            SHT_RELA
+    Flags:           [ SHF_INFO_LINK ]
+    Link:            .symtab
+    AddressAlign:    0x8
+    Info:            .gnu.build.attributes
+    Relocations:
+      - Offset:          0x40602C
+        Symbol:          .text
+        Type:            R_X86_64_64
+      - Offset:          0x406034
+        Symbol:          .text
+        Type:            R_X86_64_64
+        Addend:          38
+      - Offset:          0x406050
+        Symbol:          .text
+        Type:            R_X86_64_64
+        Addend:          53
+      - Offset:          0x406058
+        Symbol:          .text
+        Type:            R_X86_64_64
+        Addend:          53
+      - Offset:          0x406074
+        Symbol:          .init
+        Type:            R_X86_64_64
+      - Offset:          0x40607C
+        Symbol:          .init
+        Type:            R_X86_64_64
+        Addend:          22
+      - Offset:          0x406098
+        Symbol:          .fini
+        Type:            R_X86_64_64
+      - Offset:          0x4060A0
+        Symbol:          .fini
+        Type:            R_X86_64_64
+        Addend:          8
+      - Offset:          0x4060BC
+        Symbol:          .text
+        Type:            R_X86_64_64
+        Addend:          64
+      - Offset:          0x4060C4
+        Symbol:          .text
+        Type:            R_X86_64_64
+        Addend:          230
+      - Offset:          0x4060E0
+        Symbol:          .text
+        Type:            R_X86_64_64
+        Addend:          358
+      - Offset:          0x4060E8
+        Symbol:          .text
+        Type:            R_X86_64_64
+        Addend:          358
+      - Offset:          0x406104
+        Symbol:          .text
+        Type:            R_X86_64_64
+        Addend:          358
+      - Offset:          0x40610C
+        Symbol:          .text
+        Type:            R_X86_64_64
+        Addend:          358
+      - Offset:          0x406128
+        Symbol:          .init
+        Type:            R_X86_64_64
+        Addend:          22
+      - Offset:          0x406130
+        Symbol:          .init
+        Type:            R_X86_64_64
+        Addend:          27
+      - Offset:          0x40614C
+        Symbol:          .fini
+        Type:            R_X86_64_64
+        Addend:          8
+      - Offset:          0x406154
+        Symbol:          .fini
+        Type:            R_X86_64_64
+        Addend:          13
+  - Type:            SectionHeaderTable
+    Sections:
+      - Name:            .interp
+      - Name:            .note.gnu.property
+      - Name:            .note.gnu.build-id
+      - Name:            .note.ABI-tag
+      - Name:            .gnu.hash
+      - Name:            .dynsym
+      - Name:            .dynstr
+      - Name:            .gnu.version
+      - Name:            .gnu.version_r
+      - Name:            .rela.dyn
+      - Name:            .rela.plt
+      - Name:            .init
+      - Name:            .rela.init
+      - Name:            .plt
+      - Name:            .text
+      - Name:            .rela.text
+      - Name:            .fini
+      - Name:            .rodata
+      - Name:            .eh_frame_hdr
+      - Name:            .eh_frame
+      - Name:            .rela.eh_frame
+      - Name:            .init_array
+      - Name:            .rela.init_array
+      - Name:            .fini_array
+      - Name:            .rela.fini_array
+      - Name:            .dynamic
+      - Name:            .got
+      - Name:            .got.plt
+      - Name:            .data
+      - Name:            .tm_clone_table
+      - Name:            .bss
+      - Name:            .comment
+      - Name:            .gnu.build.attributes
+      - Name:            .rela.gnu.build.attributes
+      - Name:            .symtab
+      - Name:            .strtab
+      - Name:            .shstrtab
+Symbols:
+  - Name:            .interp
+    Type:            STT_SECTION
+    Section:         .interp
+    Value:           0x400318
+  - Name:            .note.gnu.property
+    Type:            STT_SECTION
+    Section:         .note.gnu.property
+    Value:           0x400338
+  - Name:            .note.gnu.build-id
+    Type:            STT_SECTION
+    Section:         .note.gnu.build-id
+    Value:           0x400358
+  - Name:            .note.ABI-tag
+    Type:            STT_SECTION
+    Section:         .note.ABI-tag
+    Value:           0x40037C
+  - Name:            .gnu.hash
+    Type:            STT_SECTION
+    Section:         .gnu.hash
+    Value:           0x4003A0
+  - Name:            .dynsym
+    Type:            STT_SECTION
+    Section:         .dynsym
+    Value:           0x4003C0
+  - Name:            .dynstr
+    Type:            STT_SECTION
+    Section:         .dynstr
+    Value:           0x400450
+  - Name:            .gnu.version
+    Type:            STT_SECTION
+    Section:         .gnu.version
+    Value:           0x4004CE
+  - Name:            .gnu.version_r
+    Type:            STT_SECTION
+    Section:         .gnu.version_r
+    Value:           0x4004E0
+  - Name:            .rela.dyn
+    Type:            STT_SECTION
+    Section:         .rela.dyn
+    Value:           0x400510
+  - Name:            .rela.plt
+    Type:            STT_SECTION
+    Section:         .rela.plt
+    Value:           0x400570
+  - Name:            .init
+    Type:            STT_SECTION
+    Section:         .init
+    Value:           0x401000
+  - Name:            .plt
+    Type:            STT_SECTION
+    Section:         .plt
+    Value:           0x401020
+  - Name:            .text
+    Type:            STT_SECTION
+    Section:         .text
+    Value:           0x401040
+  - Name:            .fini
+    Type:            STT_SECTION
+    Section:         .fini
+    Value:           0x4011A8
+  - Name:            .rodata
+    Type:            STT_SECTION
+    Section:         .rodata
+    Value:           0x402000
+  - Name:            .eh_frame_hdr
+    Type:            STT_SECTION
+    Section:         .eh_frame_hdr
+    Value:           0x402010
+  - Name:            .eh_frame
+    Type:            STT_SECTION
+    Section:         .eh_frame
+    Value:           0x402048
+  - Name:            .init_array
+    Type:            STT_SECTION
+    Section:         .init_array
+    Value:           0x403DE8
+  - Name:            .fini_array
+    Type:            STT_SECTION
+    Section:         .fini_array
+    Value:           0x403DF0
+  - Name:            .dynamic
+    Type:            STT_SECTION
+    Section:         .dynamic
+    Value:           0x403DF8
+  - Name:            .got
+    Type:            STT_SECTION
+    Section:         .got
+    Value:           0x403FC8
+  - Name:            .got.plt
+    Type:            STT_SECTION
+    Section:         .got.plt
+    Value:           0x403FE8
+  - Name:            .data
+    Type:            STT_SECTION
+    Section:         .data
+    Value:           0x404008
+  - Name:            .tm_clone_table
+    Type:            STT_SECTION
+    Section:         .tm_clone_table
+    Value:           0x404010
+  - Name:            .bss
+    Type:            STT_SECTION
+    Section:         .bss
+    Value:           0x404010
+  - Name:            .comment
+    Type:            STT_SECTION
+    Section:         .comment
+  - Name:            .gnu.build.attributes
+    Type:            STT_SECTION
+    Section:         .gnu.build.attributes
+    Value:           0x406018
+  - Name:            crt1.o
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            __abi_tag
+    Type:            STT_OBJECT
+    Section:         .note.ABI-tag
+    Value:           0x40037C
+    Size:            0x20
+  - Name:            crtstuff.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            __TMC_LIST__
+    Type:            STT_OBJECT
+    Section:         .tm_clone_table
+    Value:           0x404010
+  - Name:            deregister_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x401080
+  - Name:            register_tm_clones
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x4010B0
+  - Name:            __do_global_dtors_aux
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x4010F0
+  - Name:            completed.0
+    Type:            STT_OBJECT
+    Section:         .bss
+    Value:           0x404010
+    Size:            0x1
+  - Name:            __do_global_dtors_aux_fini_array_entry
+    Type:            STT_OBJECT
+    Section:         .fini_array
+    Value:           0x403DF0
+  - Name:            frame_dummy
+    Type:            STT_FUNC
+    Section:         .text
+    Value:           0x401120
+  - Name:            __frame_dummy_init_array_entry
+    Type:            STT_OBJECT
+    Section:         .init_array
+    Value:           0x403DE8
+  - Name:            callcont-fallthru.c
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            'crtstuff.c (1)'
+    Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            __FRAME_END__
+    Type:            STT_OBJECT
+    Section:         .eh_frame
+    Value:           0x4020F0
+  - Type:            STT_FILE
+    Index:           SHN_ABS
+  - Name:            _DYNAMIC
+    Type:            STT_OBJECT
+    Section:         .dynamic
+    Value:           0x403DF8
+  - Name:            __GNU_EH_FRAME_HDR
+    Section:         .eh_frame_hdr
+    Value:           0x402010
+  - Name:            _GLOBAL_OFFSET_TABLE_
+    Type:            STT_OBJECT
+    Section:         .got.plt
+    Value:           0x403FE8
+  - Name:            '__libc_start_main at GLIBC_2.34'
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            data_start
+    Section:         .data
+    Binding:         STB_WEAK
+    Value:           0x404008
+  - Name:            _edata
+    Section:         .tm_clone_table
+    Binding:         STB_GLOBAL
+    Value:           0x404010
+  - Name:            _fini
+    Type:            STT_FUNC
+    Section:         .fini
+    Binding:         STB_GLOBAL
+    Value:           0x4011A8
+    Other:           [ STV_HIDDEN ]
+  - Name:            __data_start
+    Section:         .data
+    Binding:         STB_GLOBAL
+    Value:           0x404008
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+  - Name:            __dso_handle
+    Type:            STT_OBJECT
+    Section:         .rodata
+    Binding:         STB_GLOBAL
+    Value:           0x402008
+    Other:           [ STV_HIDDEN ]
+  - Name:            _IO_stdin_used
+    Type:            STT_OBJECT
+    Section:         .rodata
+    Binding:         STB_GLOBAL
+    Value:           0x402000
+    Size:            0x4
+  - Name:            foo
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x401130
+    Size:            0x6
+  - Name:            _end
+    Section:         .bss
+    Binding:         STB_GLOBAL
+    Value:           0x404018
+  - Name:            _dl_relocate_static_pie
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x401070
+    Size:            0x5
+    Other:           [ STV_HIDDEN ]
+  - Name:            _start
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x401040
+    Size:            0x26
+  - Name:            __bss_start
+    Section:         .bss
+    Binding:         STB_GLOBAL
+    Value:           0x404010
+  - Name:            main
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+    Value:           0x401140
+    Size:            0x66
+  - Name:            'atoi at GLIBC_2.2.5'
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            __TMC_END__
+    Type:            STT_OBJECT
+    Section:         .tm_clone_table
+    Binding:         STB_GLOBAL
+    Value:           0x404010
+    Other:           [ STV_HIDDEN ]
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            _init
+    Type:            STT_FUNC
+    Section:         .init
+    Binding:         STB_GLOBAL
+    Value:           0x401000
+    Other:           [ STV_HIDDEN ]
+DynamicSymbols:
+  - Name:            __libc_start_main
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            _ITM_deregisterTMCloneTable
+    Binding:         STB_WEAK
+  - Name:            __gmon_start__
+    Binding:         STB_WEAK
+  - Name:            atoi
+    Type:            STT_FUNC
+    Binding:         STB_GLOBAL
+  - Name:            _ITM_registerTMCloneTable
+    Binding:         STB_WEAK
+...
diff --git a/bolt/test/X86/callcont-fallthru.test b/bolt/test/X86/callcont-fallthru.test
new file mode 100644
index 00000000000000..8e43589e8f5425
--- /dev/null
+++ b/bolt/test/X86/callcont-fallthru.test
@@ -0,0 +1,9 @@
+## Reproduces missing call continuation fallthrough count when using
+## pre-aggregated perf data
+
+# RUN: yaml2obj %p/Inputs/callcont-fallthru.yaml > %t.exe
+# RUN: llvm-bolt %t.exe --pa -p %p/Inputs/callcont-fallthru.preagg -o %t.out \
+# RUN:   --print-cfg --print-only=main | FileCheck %s
+
+# CHECK:      callq foo
+# CHECK-NEXT: count: 0

>From 5f48b9253844f970245a7e46c85bb5343b0efc4c Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Sat, 21 Sep 2024 10:04:42 -0700
Subject: [PATCH 2/8] repurpose for the fix of call cont discontinuity

Created using spr 1.3.4
---
 bolt/include/bolt/Profile/DataAggregator.h |   9 +-
 bolt/lib/Profile/DataAggregator.cpp        | 105 ++++++++-------------
 bolt/test/X86/callcont-fallthru.test       |   2 +-
 3 files changed, 46 insertions(+), 70 deletions(-)

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 6453b3070ceb8d..1e7695baab62cc 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -202,8 +202,8 @@ class DataAggregator : public DataReader {
   /// Return a vector of offsets corresponding to a trace in a function
   /// if the trace is valid, std::nullopt otherwise.
   std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
-  getFallthroughsInTrace(BinaryFunction &BF, const LBREntry &First,
-                         const LBREntry &Second, uint64_t Count = 1) const;
+  getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, uint64_t To,
+                         uint64_t Count = 1) const;
 
   /// Record external entry into the function \p BF.
   ///
@@ -268,9 +268,8 @@ class DataAggregator : public DataReader {
   /// Register a \p Branch.
   bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds);
 
-  /// Register a trace between two LBR entries supplied in execution order.
-  bool doTrace(const LBREntry &First, const LBREntry &Second,
-               uint64_t Count = 1);
+  /// Register a trace between two addresses.
+  bool doTrace(const uint64_t From, const uint64_t To, uint64_t Count = 1);
 
   /// Parser helpers
   /// Return false if we exhausted our parser buffer and finished parsing
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index fcde6f5f4642c8..f73c966ec053a4 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -804,9 +804,10 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
   };
 
   BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
-  // Ignore returns.
+  // Record returns as call->call continuation fall-through.
   if (IsReturn)
-    return true;
+    return doTrace(To - 1, To, Count);
+
   BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
   if (!FromFunc && !ToFunc)
     return false;
@@ -820,16 +821,24 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
   return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
 }
 
-bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
+bool DataAggregator::doTrace(const uint64_t From, const uint64_t To,
                              uint64_t Count) {
-  BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
-  BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
+  BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
+  BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
   if (!FromFunc || !ToFunc) {
     LLVM_DEBUG({
-      dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
-             << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
-             << " and ending in " << ToFunc->getPrintName()
-             << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
+      dbgs() << "Out of range trace starting in ";
+      if (FromFunc)
+        dbgs() << formatv("{0} @ {1:x}", *FromFunc,
+                          From - FromFunc->getAddress());
+      else
+        dbgs() << Twine::utohexstr(From);
+      dbgs() << " and ending in ";
+      if (ToFunc)
+        dbgs() << formatv("{0} @ {1:x}", *ToFunc, To - ToFunc->getAddress());
+      else
+        dbgs() << Twine::utohexstr(To);
+      dbgs() << '\n';
     });
     NumLongRangeTraces += Count;
     return false;
@@ -838,32 +847,30 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
     NumInvalidTraces += Count;
     LLVM_DEBUG({
       dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
-             << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
+             << formatv(" @ {0:x}", From - FromFunc->getAddress())
              << " and ending in " << ToFunc->getPrintName()
-             << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
+             << formatv(" @ {0:x}\n", To - ToFunc->getAddress());
     });
     return false;
   }
 
   std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
-      BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
-                                        Second.From)
-          : getFallthroughsInTrace(*FromFunc, First, Second, Count);
+      BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), From, To)
+          : getFallthroughsInTrace(*FromFunc, From, To, Count);
   if (!FTs) {
     LLVM_DEBUG(
         dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
-               << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
+               << " @ " << Twine::utohexstr(From - FromFunc->getAddress())
                << " and ending in " << ToFunc->getPrintName() << " @ "
                << ToFunc->getPrintName() << " @ "
-               << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
+               << Twine::utohexstr(To - ToFunc->getAddress()) << '\n');
     NumInvalidTraces += Count;
     return false;
   }
 
   LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
-                    << FromFunc->getPrintName() << ":"
-                    << Twine::utohexstr(First.To) << " to "
-                    << Twine::utohexstr(Second.From) << ".\n");
+                    << FromFunc->getPrintName() << ":" << Twine::utohexstr(From)
+                    << " to " << Twine::utohexstr(To) << ".\n");
   BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
   for (auto [From, To] : *FTs) {
     if (BAT) {
@@ -877,10 +884,8 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
 }
 
 std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
-DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
-                                       const LBREntry &FirstLBR,
-                                       const LBREntry &SecondLBR,
-                                       uint64_t Count) const {
+DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From,
+                                       uint64_t To, uint64_t Count) const {
   SmallVector<std::pair<uint64_t, uint64_t>, 16> Branches;
 
   BinaryContext &BC = BF.getBinaryContext();
@@ -891,8 +896,8 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
   assert(BF.hasCFG() && "can only record traces in CFG state");
 
   // Offsets of the trace within this function.
-  const uint64_t From = FirstLBR.To - BF.getAddress();
-  const uint64_t To = SecondLBR.From - BF.getAddress();
+  From = From - BF.getAddress();
+  To = To - BF.getAddress();
 
   if (From > To)
     return std::nullopt;
@@ -903,24 +908,6 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
   if (!FromBB || !ToBB)
     return std::nullopt;
 
-  // Adjust FromBB if the first LBR is a return from the last instruction in
-  // the previous block (that instruction should be a call).
-  if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
-      !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
-    const BinaryBasicBlock *PrevBB =
-        BF.getLayout().getBlock(FromBB->getIndex() - 1);
-    if (PrevBB->getSuccessor(FromBB->getLabel())) {
-      const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
-      if (Instr && BC.MIB->isCall(*Instr))
-        FromBB = PrevBB;
-      else
-        LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
-                          << '\n');
-    } else {
-      LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
-    }
-  }
-
   // Fill out information for fall-through edges. The From and To could be
   // within the same basic block, e.g. when two call instructions are in the
   // same block. In this case we skip the processing.
@@ -937,8 +924,8 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
     // Check for bad LBRs.
     if (!BB->getSuccessor(NextBB->getLabel())) {
       LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
-                        << "  " << FirstLBR << '\n'
-                        << "  " << SecondLBR << '\n');
+                        << "  " << From << '\n'
+                        << "  " << To << '\n');
       return std::nullopt;
     }
 
@@ -1595,16 +1582,11 @@ void DataAggregator::processBranchEvents() {
   NamedRegionTimer T("processBranch", "Processing branch events",
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
-  for (const auto &AggrLBR : FallthroughLBRs) {
-    const Trace &Loc = AggrLBR.first;
-    const FTInfo &Info = AggrLBR.second;
-    LBREntry First{Loc.From, Loc.From, false};
-    LBREntry Second{Loc.To, Loc.To, false};
+  for (const auto &[Loc, Info]: FallthroughLBRs) {
     if (Info.InternCount)
-      doTrace(First, Second, Info.InternCount);
+      doTrace(Loc.From, Loc.To, Info.InternCount);
     if (Info.ExternCount) {
-      First.From = 0;
-      doTrace(First, Second, Info.ExternCount);
+      doTrace(0, Loc.To, Info.ExternCount);
     }
   }
 
@@ -1768,21 +1750,16 @@ void DataAggregator::processPreAggregated() {
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
   uint64_t NumTraces = 0;
-  for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
-    switch (AggrEntry.EntryType) {
+  for (const auto &[From, To, Count, Mispreds, Type]: AggregatedLBRs) {
+    bool IsExternalOrigin = Type == AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
+    switch (Type) {
     case AggregatedLBREntry::BRANCH:
-      doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
-               AggrEntry.Mispreds);
+      doBranch(From.Offset, To.Offset, Count, Mispreds);
       break;
     case AggregatedLBREntry::FT:
     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
-      LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
-                         ? AggrEntry.From.Offset
-                         : 0,
-                     AggrEntry.From.Offset, false};
-      LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
-      doTrace(First, Second, AggrEntry.Count);
-      NumTraces += AggrEntry.Count;
+      doTrace(IsExternalOrigin ? 0 : From.Offset, To.Offset, Count);
+      NumTraces += Count;
       break;
     }
     }
diff --git a/bolt/test/X86/callcont-fallthru.test b/bolt/test/X86/callcont-fallthru.test
index 8e43589e8f5425..e0a5c5a6852d42 100644
--- a/bolt/test/X86/callcont-fallthru.test
+++ b/bolt/test/X86/callcont-fallthru.test
@@ -6,4 +6,4 @@
 # RUN:   --print-cfg --print-only=main | FileCheck %s
 
 # CHECK:      callq foo
-# CHECK-NEXT: count: 0
+# CHECK-NEXT: count: 103204

>From 97412974e7e470c00b232aed69f139d28ce97e52 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Sat, 21 Sep 2024 10:09:26 -0700
Subject: [PATCH 3/8] clang-format

Created using spr 1.3.4
---
 bolt/lib/Profile/DataAggregator.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index f73c966ec053a4..dbd0ed07c7c1d9 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -858,12 +858,12 @@ bool DataAggregator::doTrace(const uint64_t From, const uint64_t To,
       BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), From, To)
           : getFallthroughsInTrace(*FromFunc, From, To, Count);
   if (!FTs) {
-    LLVM_DEBUG(
-        dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
-               << " @ " << Twine::utohexstr(From - FromFunc->getAddress())
-               << " and ending in " << ToFunc->getPrintName() << " @ "
-               << ToFunc->getPrintName() << " @ "
-               << Twine::utohexstr(To - ToFunc->getAddress()) << '\n');
+    LLVM_DEBUG(dbgs() << "Invalid trace starting in "
+                      << FromFunc->getPrintName() << " @ "
+                      << Twine::utohexstr(From - FromFunc->getAddress())
+                      << " and ending in " << ToFunc->getPrintName() << " @ "
+                      << ToFunc->getPrintName() << " @ "
+                      << Twine::utohexstr(To - ToFunc->getAddress()) << '\n');
     NumInvalidTraces += Count;
     return false;
   }
@@ -1582,7 +1582,7 @@ void DataAggregator::processBranchEvents() {
   NamedRegionTimer T("processBranch", "Processing branch events",
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
-  for (const auto &[Loc, Info]: FallthroughLBRs) {
+  for (const auto &[Loc, Info] : FallthroughLBRs) {
     if (Info.InternCount)
       doTrace(Loc.From, Loc.To, Info.InternCount);
     if (Info.ExternCount) {
@@ -1750,7 +1750,7 @@ void DataAggregator::processPreAggregated() {
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
   uint64_t NumTraces = 0;
-  for (const auto &[From, To, Count, Mispreds, Type]: AggregatedLBRs) {
+  for (const auto &[From, To, Count, Mispreds, Type] : AggregatedLBRs) {
     bool IsExternalOrigin = Type == AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
     switch (Type) {
     case AggregatedLBREntry::BRANCH:

>From 9c4effa15a5bfa7a8c03aad25421a462bb56ffaf Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Sat, 21 Sep 2024 19:08:41 -0700
Subject: [PATCH 4/8] Drop changes in doTrace/getFallthroughsInTrace

Created using spr 1.3.4
---
 bolt/include/bolt/Profile/DataAggregator.h |   9 +-
 bolt/lib/Profile/DataAggregator.cpp        | 108 ++++++++++++++-------
 2 files changed, 77 insertions(+), 40 deletions(-)

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 1e7695baab62cc..6453b3070ceb8d 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -202,8 +202,8 @@ class DataAggregator : public DataReader {
   /// Return a vector of offsets corresponding to a trace in a function
   /// if the trace is valid, std::nullopt otherwise.
   std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
-  getFallthroughsInTrace(BinaryFunction &BF, uint64_t From, uint64_t To,
-                         uint64_t Count = 1) const;
+  getFallthroughsInTrace(BinaryFunction &BF, const LBREntry &First,
+                         const LBREntry &Second, uint64_t Count = 1) const;
 
   /// Record external entry into the function \p BF.
   ///
@@ -268,8 +268,9 @@ class DataAggregator : public DataReader {
   /// Register a \p Branch.
   bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds);
 
-  /// Register a trace between two addresses.
-  bool doTrace(const uint64_t From, const uint64_t To, uint64_t Count = 1);
+  /// Register a trace between two LBR entries supplied in execution order.
+  bool doTrace(const LBREntry &First, const LBREntry &Second,
+               uint64_t Count = 1);
 
   /// Parser helpers
   /// Return false if we exhausted our parser buffer and finished parsing
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index dbd0ed07c7c1d9..72905d0ecf6a05 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -805,8 +805,11 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
 
   BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
   // Record returns as call->call continuation fall-through.
-  if (IsReturn)
-    return doTrace(To - 1, To, Count);
+  if (IsReturn) {
+    LBREntry First{To - 1, To - 1, false};
+    LBREntry Second{To, To, false};
+    return doTrace(First, Second, Count);
+  }
 
   BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
   if (!FromFunc && !ToFunc)
@@ -821,23 +824,24 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
   return doInterBranch(FromFunc, ToFunc, From, To, Count, Mispreds);
 }
 
-bool DataAggregator::doTrace(const uint64_t From, const uint64_t To,
+bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
                              uint64_t Count) {
-  BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
-  BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
+  BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(First.To);
+  BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
   if (!FromFunc || !ToFunc) {
     LLVM_DEBUG({
       dbgs() << "Out of range trace starting in ";
       if (FromFunc)
         dbgs() << formatv("{0} @ {1:x}", *FromFunc,
-                          From - FromFunc->getAddress());
+                          First.To - FromFunc->getAddress());
       else
-        dbgs() << Twine::utohexstr(From);
+        dbgs() << Twine::utohexstr(First.To);
       dbgs() << " and ending in ";
       if (ToFunc)
-        dbgs() << formatv("{0} @ {1:x}", *ToFunc, To - ToFunc->getAddress());
+        dbgs() << formatv("{0} @ {1:x}", *ToFunc,
+                          Second.From - ToFunc->getAddress());
       else
-        dbgs() << Twine::utohexstr(To);
+        dbgs() << Twine::utohexstr(Second.From);
       dbgs() << '\n';
     });
     NumLongRangeTraces += Count;
@@ -847,30 +851,32 @@ bool DataAggregator::doTrace(const uint64_t From, const uint64_t To,
     NumInvalidTraces += Count;
     LLVM_DEBUG({
       dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
-             << formatv(" @ {0:x}", From - FromFunc->getAddress())
+             << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
              << " and ending in " << ToFunc->getPrintName()
-             << formatv(" @ {0:x}\n", To - ToFunc->getAddress());
+             << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
     });
     return false;
   }
 
   std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
-      BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), From, To)
-          : getFallthroughsInTrace(*FromFunc, From, To, Count);
+      BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
+                                        Second.From)
+          : getFallthroughsInTrace(*FromFunc, First, Second, Count);
   if (!FTs) {
-    LLVM_DEBUG(dbgs() << "Invalid trace starting in "
-                      << FromFunc->getPrintName() << " @ "
-                      << Twine::utohexstr(From - FromFunc->getAddress())
-                      << " and ending in " << ToFunc->getPrintName() << " @ "
-                      << ToFunc->getPrintName() << " @ "
-                      << Twine::utohexstr(To - ToFunc->getAddress()) << '\n');
+    LLVM_DEBUG(
+        dbgs() << "Invalid trace starting in " << FromFunc->getPrintName()
+               << " @ " << Twine::utohexstr(First.To - FromFunc->getAddress())
+               << " and ending in " << ToFunc->getPrintName() << " @ "
+               << ToFunc->getPrintName() << " @ "
+               << Twine::utohexstr(Second.From - ToFunc->getAddress()) << '\n');
     NumInvalidTraces += Count;
     return false;
   }
 
   LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for "
-                    << FromFunc->getPrintName() << ":" << Twine::utohexstr(From)
-                    << " to " << Twine::utohexstr(To) << ".\n");
+                    << FromFunc->getPrintName() << ":"
+                    << Twine::utohexstr(First.To) << " to "
+                    << Twine::utohexstr(Second.From) << ".\n");
   BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
   for (auto [From, To] : *FTs) {
     if (BAT) {
@@ -884,8 +890,10 @@ bool DataAggregator::doTrace(const uint64_t From, const uint64_t To,
 }
 
 std::optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
-DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From,
-                                       uint64_t To, uint64_t Count) const {
+DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
+                                       const LBREntry &FirstLBR,
+                                       const LBREntry &SecondLBR,
+                                       uint64_t Count) const {
   SmallVector<std::pair<uint64_t, uint64_t>, 16> Branches;
 
   BinaryContext &BC = BF.getBinaryContext();
@@ -896,8 +904,8 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From,
   assert(BF.hasCFG() && "can only record traces in CFG state");
 
   // Offsets of the trace within this function.
-  From = From - BF.getAddress();
-  To = To - BF.getAddress();
+  const uint64_t From = FirstLBR.To - BF.getAddress();
+  const uint64_t To = SecondLBR.From - BF.getAddress();
 
   if (From > To)
     return std::nullopt;
@@ -908,6 +916,24 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From,
   if (!FromBB || !ToBB)
     return std::nullopt;
 
+  // Adjust FromBB if the first LBR is a return from the last instruction in
+  // the previous block (that instruction should be a call).
+  if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
+      !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
+    const BinaryBasicBlock *PrevBB =
+        BF.getLayout().getBlock(FromBB->getIndex() - 1);
+    if (PrevBB->getSuccessor(FromBB->getLabel())) {
+      const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
+      if (Instr && BC.MIB->isCall(*Instr))
+        FromBB = PrevBB;
+      else
+        LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
+                          << '\n');
+    } else {
+      LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
+    }
+  }
+
   // Fill out information for fall-through edges. The From and To could be
   // within the same basic block, e.g. when two call instructions are in the
   // same block. In this case we skip the processing.
@@ -924,8 +950,8 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, uint64_t From,
     // Check for bad LBRs.
     if (!BB->getSuccessor(NextBB->getLabel())) {
       LLVM_DEBUG(dbgs() << "no fall-through for the trace:\n"
-                        << "  " << From << '\n'
-                        << "  " << To << '\n');
+                        << "  " << FirstLBR << '\n'
+                        << "  " << SecondLBR << '\n');
       return std::nullopt;
     }
 
@@ -1582,11 +1608,16 @@ void DataAggregator::processBranchEvents() {
   NamedRegionTimer T("processBranch", "Processing branch events",
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
-  for (const auto &[Loc, Info] : FallthroughLBRs) {
+  for (const auto &AggrLBR : FallthroughLBRs) {
+    const Trace &Loc = AggrLBR.first;
+    const FTInfo &Info = AggrLBR.second;
+    LBREntry First{Loc.From, Loc.From, false};
+    LBREntry Second{Loc.To, Loc.To, false};
     if (Info.InternCount)
-      doTrace(Loc.From, Loc.To, Info.InternCount);
+      doTrace(First, Second, Info.InternCount);
     if (Info.ExternCount) {
-      doTrace(0, Loc.To, Info.ExternCount);
+      First.From = 0;
+      doTrace(First, Second, Info.ExternCount);
     }
   }
 
@@ -1750,16 +1781,21 @@ void DataAggregator::processPreAggregated() {
                      TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
 
   uint64_t NumTraces = 0;
-  for (const auto &[From, To, Count, Mispreds, Type] : AggregatedLBRs) {
-    bool IsExternalOrigin = Type == AggregatedLBREntry::FT_EXTERNAL_ORIGIN;
-    switch (Type) {
+  for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
+    switch (AggrEntry.EntryType) {
     case AggregatedLBREntry::BRANCH:
-      doBranch(From.Offset, To.Offset, Count, Mispreds);
+      doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
+               AggrEntry.Mispreds);
       break;
     case AggregatedLBREntry::FT:
     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
-      doTrace(IsExternalOrigin ? 0 : From.Offset, To.Offset, Count);
-      NumTraces += Count;
+      LBREntry First{AggrEntry.EntryType == AggregatedLBREntry::FT
+                         ? AggrEntry.From.Offset
+                         : 0,
+                     AggrEntry.From.Offset, false};
+      LBREntry Second{AggrEntry.To.Offset, AggrEntry.To.Offset, false};
+      doTrace(First, Second, AggrEntry.Count);
+      NumTraces += AggrEntry.Count;
       break;
     }
     }

>From 9e4dd66c4669fd1a374d35b7522fde864e9b2efb Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Tue, 1 Oct 2024 17:19:07 -0700
Subject: [PATCH 5/8] Handle external origin LBR (non-BAT mode)

Created using spr 1.3.4
---
 bolt/lib/Profile/DataAggregator.cpp           | 122 ++-
 bolt/test/X86/Inputs/callcont-fallthru.preagg |  21 -
 bolt/test/X86/Inputs/callcont-fallthru.yaml   | 889 ------------------
 bolt/test/X86/callcont-fallthru.s             |  60 ++
 bolt/test/X86/callcont-fallthru.test          |   9 -
 5 files changed, 132 insertions(+), 969 deletions(-)
 delete mode 100644 bolt/test/X86/Inputs/callcont-fallthru.preagg
 delete mode 100644 bolt/test/X86/Inputs/callcont-fallthru.yaml
 create mode 100644 bolt/test/X86/callcont-fallthru.s
 delete mode 100644 bolt/test/X86/callcont-fallthru.test

diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 72905d0ecf6a05..fe371ef3ca1050 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -775,46 +775,86 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
 
 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
                               uint64_t Mispreds) {
-  bool IsReturn = false;
-  auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
-    if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) {
-      Addr -= Func->getAddress();
-      if (IsFrom) {
-        auto checkReturn = [&](auto MaybeInst) {
-          IsReturn = MaybeInst && BC->MIB->isReturn(*MaybeInst);
-        };
-        if (Func->hasInstructions())
-          checkReturn(Func->getInstructionAtOffset(Addr));
-        else
-          checkReturn(Func->disassembleInstructionAtOffset(Addr));
-      }
+  // Returns whether \p Offset in \p Func contains a return instruction.
+  auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
+    auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
+    return Func.hasInstructions()
+               ? isReturn(Func.getInstructionAtOffset(Offset))
+               : isReturn(Func.disassembleInstructionAtOffset(Offset));
+  };
 
-      if (BAT)
-        Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
+  // Returns whether \p Offset in \p Func corresponds to a call continuation
+  // fallthrough block.
+  auto checkCallCont = [&](BinaryFunction &Func, const uint64_t Offset) {
+    // Note the use of MCInstrAnalysis: no call continuation for a tail call.
+    auto isCall = [&](auto MI) { return MI && BC->MIA->isCall(*MI); };
+
+    // No call continuation at a function start.
+    if (!Offset)
+      return false;
+
+    // FIXME: support BAT case where the function might be in empty state
+    // (split fragments declared non-simple).
+    if (!Func.hasCFG())
+      return false;
+
+    // The offset should not be an entry point or a landing pad.
+    const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset);
+    if (!ContBB || ContBB->isEntryPoint() || ContBB->isLandingPad())
+      return false;
+
+    // Check that preceding instruction is a call.
+    const BinaryBasicBlock *CallBB =
+        Func.getBasicBlockContainingOffset(Offset - 1);
+    if (!CallBB || CallBB == ContBB)
+      return false;
+    return isCall(CallBB->getLastNonPseudoInstr());
+  };
 
-      if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) {
-        Func = ParentFunc;
-        if (IsFrom)
-          NumColdSamples += Count;
-      }
+  // Mutates \p Addr to an offset into the containing function, performing BAT
+  // offset translation and parent lookup.
+  //
+  // Returns the containing function (or BAT parent) and whether the address
+  // corresponds to a return (if \p IsFrom) or a call continuation (otherwise).
+  auto handleAddress = [&](uint64_t &Addr, bool IsFrom) {
+    BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
+    if (!Func)
+      return std::pair{Func, false};
 
-      return Func;
-    }
-    return nullptr;
-  };
+    Addr -= Func->getAddress();
 
-  BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
-  // Record returns as call->call continuation fall-through.
-  if (IsReturn) {
-    LBREntry First{To - 1, To - 1, false};
-    LBREntry Second{To, To, false};
-    return doTrace(First, Second, Count);
-  }
+    bool IsRetOrCallCont =
+        IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr);
+
+    if (BAT)
+      Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
+
+    BinaryFunction *ParentFunc = getBATParentFunction(*Func);
+    if (!ParentFunc)
+      return std::pair{Func, IsRetOrCallCont};
 
-  BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
+    if (IsFrom)
+      NumColdSamples += Count;
+
+    return std::pair{ParentFunc, IsRetOrCallCont};
+  };
+
+  uint64_t ToOrig = To;
+  auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom=*/true);
+  auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom=*/false);
   if (!FromFunc && !ToFunc)
     return false;
 
+  // Record call to continuation trace.
+  if (IsCallCont && FromFunc != ToFunc) {
+    LBREntry First{ToOrig - 1, ToOrig - 1, false};
+    LBREntry Second{ToOrig, ToOrig, false};
+    return doTrace(First, Second, Count);
+  }
+  // Ignore returns.
+  if (IsReturn)
+    return true;
+
   // Treat recursive control transfers as inter-branches.
   if (FromFunc == ToFunc && To != 0) {
     recordBranch(*FromFunc, From, To, Count, Mispreds);
@@ -916,24 +956,6 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
   if (!FromBB || !ToBB)
     return std::nullopt;
 
-  // Adjust FromBB if the first LBR is a return from the last instruction in
-  // the previous block (that instruction should be a call).
-  if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
-      !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
-    const BinaryBasicBlock *PrevBB =
-        BF.getLayout().getBlock(FromBB->getIndex() - 1);
-    if (PrevBB->getSuccessor(FromBB->getLabel())) {
-      const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
-      if (Instr && BC.MIB->isCall(*Instr))
-        FromBB = PrevBB;
-      else
-        LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
-                          << '\n');
-    } else {
-      LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
-    }
-  }
-
   // Fill out information for fall-through edges. The From and To could be
   // within the same basic block, e.g. when two call instructions are in the
   // same block. In this case we skip the processing.
diff --git a/bolt/test/X86/Inputs/callcont-fallthru.preagg b/bolt/test/X86/Inputs/callcont-fallthru.preagg
deleted file mode 100644
index 0b5f344540573a..00000000000000
--- a/bolt/test/X86/Inputs/callcont-fallthru.preagg
+++ /dev/null
@@ -1,21 +0,0 @@
-B ffffffff81e01006 401194 8 0
-B 401180 401199 98482 96
-B 401199 401166 99542 0
-B 401177 401130 102776 0
-B 401135 40117c 103204 0
-B 401186 40118b 1022983 0
-B 401194 40117c 1021645 1
-F 40117c 401135 1161
-F 40117c 401180 92267
-F 40118b 401194 991002
-F 40117c 401186 968072
-F 40118b 401186 11468
-F 401130 401135 100015
-F 401166 401177 96992
-F 401199 401199 96168
-F 40117c ffffffff81e01006 7
-F 401199 401180 1140
-F 401194 ffffffff81e01006 1
-F 40117c 401194 11522
-F 401166 401199 1151
-F 401130 401177 1154
diff --git a/bolt/test/X86/Inputs/callcont-fallthru.yaml b/bolt/test/X86/Inputs/callcont-fallthru.yaml
deleted file mode 100644
index a1f8417d1e217d..00000000000000
--- a/bolt/test/X86/Inputs/callcont-fallthru.yaml
+++ /dev/null
@@ -1,889 +0,0 @@
---- !ELF
-FileHeader:
-  Class:           ELFCLASS64
-  Data:            ELFDATA2LSB
-  Type:            ET_EXEC
-  Machine:         EM_X86_64
-  Entry:           0x401040
-ProgramHeaders:
-  - Type:            PT_PHDR
-    Flags:           [ PF_R ]
-    VAddr:           0x400040
-    Align:           0x8
-  - Type:            PT_INTERP
-    Flags:           [ PF_R ]
-    FirstSec:        .interp
-    LastSec:         .interp
-    VAddr:           0x400318
-  - Type:            PT_LOAD
-    Flags:           [ PF_R ]
-    FirstSec:        .interp
-    LastSec:         .rela.plt
-    VAddr:           0x400000
-    Align:           0x1000
-  - Type:            PT_LOAD
-    Flags:           [ PF_X, PF_R ]
-    FirstSec:        .init
-    LastSec:         .fini
-    VAddr:           0x401000
-    Align:           0x1000
-  - Type:            PT_LOAD
-    Flags:           [ PF_R ]
-    FirstSec:        .rodata
-    LastSec:         .eh_frame
-    VAddr:           0x402000
-    Align:           0x1000
-  - Type:            PT_LOAD
-    Flags:           [ PF_W, PF_R ]
-    FirstSec:        .init_array
-    LastSec:         .bss
-    VAddr:           0x403DE8
-    Align:           0x1000
-  - Type:            PT_DYNAMIC
-    Flags:           [ PF_W, PF_R ]
-    FirstSec:        .dynamic
-    LastSec:         .dynamic
-    VAddr:           0x403DF8
-    Align:           0x8
-  - Type:            PT_NOTE
-    Flags:           [ PF_R ]
-    FirstSec:        .note.gnu.property
-    LastSec:         .note.gnu.property
-    VAddr:           0x400338
-    Align:           0x8
-  - Type:            PT_NOTE
-    Flags:           [ PF_R ]
-    FirstSec:        .note.gnu.build-id
-    LastSec:         .note.ABI-tag
-    VAddr:           0x400358
-    Align:           0x4
-  - Type:            PT_GNU_PROPERTY
-    Flags:           [ PF_R ]
-    FirstSec:        .note.gnu.property
-    LastSec:         .note.gnu.property
-    VAddr:           0x400338
-    Align:           0x8
-  - Type:            PT_GNU_EH_FRAME
-    Flags:           [ PF_R ]
-    FirstSec:        .eh_frame_hdr
-    LastSec:         .eh_frame_hdr
-    VAddr:           0x402010
-    Align:           0x4
-  - Type:            PT_GNU_STACK
-    Flags:           [ PF_W, PF_R ]
-    Align:           0x10
-  - Type:            PT_GNU_RELRO
-    Flags:           [ PF_R ]
-    FirstSec:        .init_array
-    LastSec:         .got
-    VAddr:           0x403DE8
-Sections:
-  - Name:            .interp
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x400318
-    AddressAlign:    0x1
-    Content:         2F6C696236342F6C642D6C696E75782D7838362D36342E736F2E3200
-  - Name:            .note.gnu.property
-    Type:            SHT_NOTE
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x400338
-    AddressAlign:    0x8
-    Notes:
-      - Name:            GNU
-        Desc:            028000C0040000000300000000000000
-        Type:            NT_GNU_PROPERTY_TYPE_0
-  - Name:            .note.gnu.build-id
-    Type:            SHT_NOTE
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x400358
-    AddressAlign:    0x4
-    Notes:
-      - Name:            GNU
-        Desc:            A77EA471B9AAA21E180E5FD02A0A0B2E4AB643E9
-        Type:            NT_PRPSINFO
-  - Name:            .note.ABI-tag
-    Type:            SHT_NOTE
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x40037C
-    AddressAlign:    0x4
-    Notes:
-      - Name:            GNU
-        Desc:            '00000000030000000200000000000000'
-        Type:            NT_VERSION
-  - Name:            .gnu.hash
-    Type:            SHT_GNU_HASH
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x4003A0
-    Link:            .dynsym
-    AddressAlign:    0x8
-    Header:
-      SymNdx:          0x1
-      Shift2:          0x0
-    BloomFilter:     [ 0x0 ]
-    HashBuckets:     [ 0x0 ]
-    HashValues:      [  ]
-  - Name:            .dynsym
-    Type:            SHT_DYNSYM
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x4003C0
-    Link:            .dynstr
-    AddressAlign:    0x8
-  - Name:            .dynstr
-    Type:            SHT_STRTAB
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x400450
-    AddressAlign:    0x1
-  - Name:            .gnu.version
-    Type:            SHT_GNU_versym
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x4004CE
-    Link:            .dynsym
-    AddressAlign:    0x2
-    Entries:         [ 0, 2, 1, 1, 3, 1 ]
-  - Name:            .gnu.version_r
-    Type:            SHT_GNU_verneed
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x4004E0
-    Link:            .dynstr
-    AddressAlign:    0x8
-    Dependencies:
-      - Version:         1
-        File:            libc.so.6
-        Entries:
-          - Name:            GLIBC_2.2.5
-            Hash:            157882997
-            Flags:           0
-            Other:           3
-          - Name:            GLIBC_2.34
-            Hash:            110530996
-            Flags:           0
-            Other:           2
-  - Name:            .rela.dyn
-    Type:            SHT_RELA
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x400510
-    Link:            .dynsym
-    AddressAlign:    0x8
-    Relocations:
-      - Offset:          0x403FC8
-        Symbol:          __libc_start_main
-        Type:            R_X86_64_GLOB_DAT
-      - Offset:          0x403FD0
-        Symbol:          _ITM_deregisterTMCloneTable
-        Type:            R_X86_64_GLOB_DAT
-      - Offset:          0x403FD8
-        Symbol:          __gmon_start__
-        Type:            R_X86_64_GLOB_DAT
-      - Offset:          0x403FE0
-        Symbol:          _ITM_registerTMCloneTable
-        Type:            R_X86_64_GLOB_DAT
-  - Name:            .rela.plt
-    Type:            SHT_RELA
-    Flags:           [ SHF_ALLOC, SHF_INFO_LINK ]
-    Address:         0x400570
-    Link:            .dynsym
-    AddressAlign:    0x8
-    Info:            .got.plt
-    Relocations:
-      - Offset:          0x404000
-        Symbol:          atoi
-        Type:            R_X86_64_JUMP_SLOT
-  - Name:            .init
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
-    Address:         0x401000
-    AddressAlign:    0x4
-    Offset:          0x1000
-    Content:         F30F1EFA4883EC08488B05C92F00004885C07402FFD04883C408C3
-  - Name:            .plt
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
-    Address:         0x401020
-    AddressAlign:    0x10
-    EntSize:         0x10
-    Content:         FF35CA2F0000FF25CC2F00000F1F4000FF25CA2F00006800000000E9E0FFFFFF
-  - Name:            .text
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
-    Address:         0x401040
-    AddressAlign:    0x10
-    Content:         F30F1EFA31ED4989D15E4889E24883E4F050544531C031C948C7C740114000FF15632F0000F4662E0F1F840000000000F30F1EFAC3662E0F1F84000000000090488D3D892F0000488D05822F00004839F87415488B05362F00004885C07409FFE00F1F8000000000C30F1F8000000000488D3D592F0000488D35522F00004829FE4889F048C1EE3F48C1F8034801C648D1FE7414488B05052F00004885C07408FFE0660F1F440000C30F1F8000000000F30F1EFA803D152F0000007513554889E5E87AFFFFFFC605032F0000015DC390C366662E0F1F8400000000000F1F4000F30F1EFAEB8A662E0F1F840000000000554889E55DC3662E0F1F840000000000554889E54883EC20C745FC00000000897DF8488975F0488B45F0488B7808E8CDFEFFFF8945EC837DEC000F842E000000C745E80A000000E8B4FFFFFF837DE8000F8413000000E9000000008B45E883C0FF8945E8E9E3FFFFFFE9C8FFFFFF31C04883C4205DC3
-  - Name:            .fini
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
-    Address:         0x4011A8
-    AddressAlign:    0x4
-    Content:         F30F1EFA4883EC084883C408C3
-  - Name:            .rodata
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x402000
-    AddressAlign:    0x8
-    Offset:          0x2000
-    Content:         '01000200000000000000000000000000'
-  - Name:            .eh_frame_hdr
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x402010
-    AddressAlign:    0x4
-    Content:         011B033B340000000500000010F0FFFF7800000030F0FFFF5000000060F0FFFF6400000020F1FFFFA000000030F1FFFFC0000000
-  - Name:            .eh_frame
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x402048
-    AddressAlign:    0x8
-    Content:         1400000000000000017A5200017810011B0C070890010000100000001C000000D8EFFFFF26000000004407101000000030000000F4EFFFFF0500000000000000240000004400000090EFFFFF20000000000E10460E184A0F0B770880003F1A3B2A332422000000001C0000006C00000078F0FFFF0600000000410E108602430D06410C07080000001C0000008C00000068F0FFFF6600000000410E108602430D0602610C0708000000000000
-  - Name:            .init_array
-    Type:            SHT_INIT_ARRAY
-    Flags:           [ SHF_WRITE, SHF_ALLOC ]
-    Address:         0x403DE8
-    AddressAlign:    0x8
-    EntSize:         0x8
-    Offset:          0x2DE8
-    Content:         '2011400000000000'
-  - Name:            .fini_array
-    Type:            SHT_FINI_ARRAY
-    Flags:           [ SHF_WRITE, SHF_ALLOC ]
-    Address:         0x403DF0
-    AddressAlign:    0x8
-    EntSize:         0x8
-    Content:         F010400000000000
-  - Name:            .dynamic
-    Type:            SHT_DYNAMIC
-    Flags:           [ SHF_WRITE, SHF_ALLOC ]
-    Address:         0x403DF8
-    Link:            .dynstr
-    AddressAlign:    0x8
-    Entries:
-      - Tag:             DT_NEEDED
-        Value:           0x18
-      - Tag:             DT_INIT
-        Value:           0x401000
-      - Tag:             DT_FINI
-        Value:           0x4011A8
-      - Tag:             DT_INIT_ARRAY
-        Value:           0x403DE8
-      - Tag:             DT_INIT_ARRAYSZ
-        Value:           0x8
-      - Tag:             DT_FINI_ARRAY
-        Value:           0x403DF0
-      - Tag:             DT_FINI_ARRAYSZ
-        Value:           0x8
-      - Tag:             DT_GNU_HASH
-        Value:           0x4003A0
-      - Tag:             DT_STRTAB
-        Value:           0x400450
-      - Tag:             DT_SYMTAB
-        Value:           0x4003C0
-      - Tag:             DT_STRSZ
-        Value:           0x7E
-      - Tag:             DT_SYMENT
-        Value:           0x18
-      - Tag:             DT_DEBUG
-        Value:           0x0
-      - Tag:             DT_PLTGOT
-        Value:           0x403FE8
-      - Tag:             DT_PLTRELSZ
-        Value:           0x18
-      - Tag:             DT_PLTREL
-        Value:           0x7
-      - Tag:             DT_JMPREL
-        Value:           0x400570
-      - Tag:             DT_RELA
-        Value:           0x400510
-      - Tag:             DT_RELASZ
-        Value:           0x60
-      - Tag:             DT_RELAENT
-        Value:           0x18
-      - Tag:             DT_VERNEED
-        Value:           0x4004E0
-      - Tag:             DT_VERNEEDNUM
-        Value:           0x1
-      - Tag:             DT_VERSYM
-        Value:           0x4004CE
-      - Tag:             DT_NULL
-        Value:           0x0
-      - Tag:             DT_NULL
-        Value:           0x0
-      - Tag:             DT_NULL
-        Value:           0x0
-      - Tag:             DT_NULL
-        Value:           0x0
-      - Tag:             DT_NULL
-        Value:           0x0
-      - Tag:             DT_NULL
-        Value:           0x0
-  - Name:            .got
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_WRITE, SHF_ALLOC ]
-    Address:         0x403FC8
-    AddressAlign:    0x8
-    EntSize:         0x8
-    Content:         '0000000000000000000000000000000000000000000000000000000000000000'
-  - Name:            .got.plt
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_WRITE, SHF_ALLOC ]
-    Address:         0x403FE8
-    AddressAlign:    0x8
-    EntSize:         0x8
-    Content:         F83D400000000000000000000000000000000000000000003610400000000000
-  - Name:            .data
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_WRITE, SHF_ALLOC ]
-    Address:         0x404008
-    AddressAlign:    0x1
-    Content:         '00000000'
-  - Name:            .tm_clone_table
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_WRITE, SHF_ALLOC ]
-    Address:         0x404010
-    AddressAlign:    0x8
-  - Name:            .bss
-    Type:            SHT_NOBITS
-    Flags:           [ SHF_WRITE, SHF_ALLOC ]
-    Address:         0x404010
-    AddressAlign:    0x1
-    Size:            0x8
-  - Name:            .comment
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_MERGE, SHF_STRINGS ]
-    AddressAlign:    0x1
-    EntSize:         0x1
-    Content:         4743433A2028474E55292031312E352E302032303234303731392028526564204861742031312E352E302D3229004743433A2028474E55292031332E332E312032303234303631312028526564204861742031332E332E312D322900636C616E672076657273696F6E2031382E312E38202843656E744F532031382E312E382D332E656C392900
-  - Name:            .gnu.build.attributes
-    Type:            SHT_NOTE
-    Address:         0x406018
-    AddressAlign:    0x4
-    Notes:
-      - Name:            "GA$\x013a1"
-        Desc:            '40104000000000006610400000000000'
-        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
-      - Name:            "GA$\x013a1"
-        Desc:            '75104000000000007510400000000000'
-        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
-      - Name:            "GA$\x013a1"
-        Desc:            '00104000000000001610400000000000'
-        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
-      - Name:            "GA$\x013a1"
-        Desc:            A811400000000000B011400000000000
-        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
-      - Name:            "GA$\x013a1"
-        Desc:            '80104000000000002611400000000000'
-        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
-      - Name:            "GA$\x013a1"
-        Desc:            A611400000000000A611400000000000
-        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
-      - Name:            "GA$\x013a1"
-        Desc:            A611400000000000A611400000000000
-        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
-      - Name:            "GA$\x013a1"
-        Desc:            16104000000000001B10400000000000
-        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
-      - Name:            "GA$\x013a1"
-        Desc:            B011400000000000B511400000000000
-        Type:            NT_GNU_BUILD_ATTRIBUTE_OPEN
-  - Name:            .rela.init
-    Type:            SHT_RELA
-    Flags:           [ SHF_INFO_LINK ]
-    Link:            .symtab
-    AddressAlign:    0x8
-    Info:            .init
-    Relocations:
-      - Offset:          0x40100B
-        Symbol:          __gmon_start__
-        Type:            R_X86_64_REX_GOTPCRELX
-        Addend:          -4
-  - Name:            .rela.text
-    Type:            SHT_RELA
-    Flags:           [ SHF_INFO_LINK ]
-    Link:            .symtab
-    AddressAlign:    0x8
-    Info:            .text
-    Relocations:
-      - Offset:          0x40105B
-        Symbol:          main
-        Type:            R_X86_64_32S
-      - Offset:          0x401061
-        Symbol:          '__libc_start_main at GLIBC_2.34'
-        Type:            R_X86_64_GOTPCRELX
-        Addend:          -4
-      - Offset:          0x401083
-        Symbol:          .tm_clone_table
-        Type:            R_X86_64_PC32
-        Addend:          -4
-      - Offset:          0x40108A
-        Symbol:          __TMC_END__
-        Type:            R_X86_64_PC32
-        Addend:          -4
-      - Offset:          0x401096
-        Symbol:          _ITM_deregisterTMCloneTable
-        Type:            R_X86_64_REX_GOTPCRELX
-        Addend:          -4
-      - Offset:          0x4010B3
-        Symbol:          .tm_clone_table
-        Type:            R_X86_64_PC32
-        Addend:          -4
-      - Offset:          0x4010BA
-        Symbol:          __TMC_END__
-        Type:            R_X86_64_PC32
-        Addend:          -4
-      - Offset:          0x4010D7
-        Symbol:          _ITM_registerTMCloneTable
-        Type:            R_X86_64_REX_GOTPCRELX
-        Addend:          -4
-      - Offset:          0x4010F6
-        Symbol:          .bss
-        Type:            R_X86_64_PC32
-        Addend:          -5
-      - Offset:          0x401108
-        Symbol:          .bss
-        Type:            R_X86_64_PC32
-        Addend:          -5
-      - Offset:          0x40115F
-        Symbol:          'atoi at GLIBC_2.2.5'
-        Type:            R_X86_64_PLT32
-        Addend:          -4
-      - Offset:          0x401178
-        Symbol:          foo
-        Type:            R_X86_64_PLT32
-        Addend:          -4
-  - Name:            .rela.eh_frame
-    Type:            SHT_RELA
-    Flags:           [ SHF_INFO_LINK ]
-    Link:            .symtab
-    AddressAlign:    0x8
-    Info:            .eh_frame
-    Relocations:
-      - Offset:          0x402068
-        Symbol:          .text
-        Type:            R_X86_64_PC32
-      - Offset:          0x40207C
-        Symbol:          .text
-        Type:            R_X86_64_PC32
-        Addend:          48
-      - Offset:          0x4020B8
-        Symbol:          .text
-        Type:            R_X86_64_PC32
-        Addend:          240
-      - Offset:          0x4020D8
-        Symbol:          .text
-        Type:            R_X86_64_PC32
-        Addend:          256
-  - Name:            .rela.init_array
-    Type:            SHT_RELA
-    Flags:           [ SHF_INFO_LINK ]
-    Link:            .symtab
-    AddressAlign:    0x8
-    Info:            .init_array
-    Relocations:
-      - Offset:          0x403DE8
-        Symbol:          .text
-        Type:            R_X86_64_64
-        Addend:          224
-  - Name:            .rela.fini_array
-    Type:            SHT_RELA
-    Flags:           [ SHF_INFO_LINK ]
-    Link:            .symtab
-    AddressAlign:    0x8
-    Info:            .fini_array
-    Relocations:
-      - Offset:          0x403DF0
-        Symbol:          .text
-        Type:            R_X86_64_64
-        Addend:          176
-  - Name:            .rela.gnu.build.attributes
-    Type:            SHT_RELA
-    Flags:           [ SHF_INFO_LINK ]
-    Link:            .symtab
-    AddressAlign:    0x8
-    Info:            .gnu.build.attributes
-    Relocations:
-      - Offset:          0x40602C
-        Symbol:          .text
-        Type:            R_X86_64_64
-      - Offset:          0x406034
-        Symbol:          .text
-        Type:            R_X86_64_64
-        Addend:          38
-      - Offset:          0x406050
-        Symbol:          .text
-        Type:            R_X86_64_64
-        Addend:          53
-      - Offset:          0x406058
-        Symbol:          .text
-        Type:            R_X86_64_64
-        Addend:          53
-      - Offset:          0x406074
-        Symbol:          .init
-        Type:            R_X86_64_64
-      - Offset:          0x40607C
-        Symbol:          .init
-        Type:            R_X86_64_64
-        Addend:          22
-      - Offset:          0x406098
-        Symbol:          .fini
-        Type:            R_X86_64_64
-      - Offset:          0x4060A0
-        Symbol:          .fini
-        Type:            R_X86_64_64
-        Addend:          8
-      - Offset:          0x4060BC
-        Symbol:          .text
-        Type:            R_X86_64_64
-        Addend:          64
-      - Offset:          0x4060C4
-        Symbol:          .text
-        Type:            R_X86_64_64
-        Addend:          230
-      - Offset:          0x4060E0
-        Symbol:          .text
-        Type:            R_X86_64_64
-        Addend:          358
-      - Offset:          0x4060E8
-        Symbol:          .text
-        Type:            R_X86_64_64
-        Addend:          358
-      - Offset:          0x406104
-        Symbol:          .text
-        Type:            R_X86_64_64
-        Addend:          358
-      - Offset:          0x40610C
-        Symbol:          .text
-        Type:            R_X86_64_64
-        Addend:          358
-      - Offset:          0x406128
-        Symbol:          .init
-        Type:            R_X86_64_64
-        Addend:          22
-      - Offset:          0x406130
-        Symbol:          .init
-        Type:            R_X86_64_64
-        Addend:          27
-      - Offset:          0x40614C
-        Symbol:          .fini
-        Type:            R_X86_64_64
-        Addend:          8
-      - Offset:          0x406154
-        Symbol:          .fini
-        Type:            R_X86_64_64
-        Addend:          13
-  - Type:            SectionHeaderTable
-    Sections:
-      - Name:            .interp
-      - Name:            .note.gnu.property
-      - Name:            .note.gnu.build-id
-      - Name:            .note.ABI-tag
-      - Name:            .gnu.hash
-      - Name:            .dynsym
-      - Name:            .dynstr
-      - Name:            .gnu.version
-      - Name:            .gnu.version_r
-      - Name:            .rela.dyn
-      - Name:            .rela.plt
-      - Name:            .init
-      - Name:            .rela.init
-      - Name:            .plt
-      - Name:            .text
-      - Name:            .rela.text
-      - Name:            .fini
-      - Name:            .rodata
-      - Name:            .eh_frame_hdr
-      - Name:            .eh_frame
-      - Name:            .rela.eh_frame
-      - Name:            .init_array
-      - Name:            .rela.init_array
-      - Name:            .fini_array
-      - Name:            .rela.fini_array
-      - Name:            .dynamic
-      - Name:            .got
-      - Name:            .got.plt
-      - Name:            .data
-      - Name:            .tm_clone_table
-      - Name:            .bss
-      - Name:            .comment
-      - Name:            .gnu.build.attributes
-      - Name:            .rela.gnu.build.attributes
-      - Name:            .symtab
-      - Name:            .strtab
-      - Name:            .shstrtab
-Symbols:
-  - Name:            .interp
-    Type:            STT_SECTION
-    Section:         .interp
-    Value:           0x400318
-  - Name:            .note.gnu.property
-    Type:            STT_SECTION
-    Section:         .note.gnu.property
-    Value:           0x400338
-  - Name:            .note.gnu.build-id
-    Type:            STT_SECTION
-    Section:         .note.gnu.build-id
-    Value:           0x400358
-  - Name:            .note.ABI-tag
-    Type:            STT_SECTION
-    Section:         .note.ABI-tag
-    Value:           0x40037C
-  - Name:            .gnu.hash
-    Type:            STT_SECTION
-    Section:         .gnu.hash
-    Value:           0x4003A0
-  - Name:            .dynsym
-    Type:            STT_SECTION
-    Section:         .dynsym
-    Value:           0x4003C0
-  - Name:            .dynstr
-    Type:            STT_SECTION
-    Section:         .dynstr
-    Value:           0x400450
-  - Name:            .gnu.version
-    Type:            STT_SECTION
-    Section:         .gnu.version
-    Value:           0x4004CE
-  - Name:            .gnu.version_r
-    Type:            STT_SECTION
-    Section:         .gnu.version_r
-    Value:           0x4004E0
-  - Name:            .rela.dyn
-    Type:            STT_SECTION
-    Section:         .rela.dyn
-    Value:           0x400510
-  - Name:            .rela.plt
-    Type:            STT_SECTION
-    Section:         .rela.plt
-    Value:           0x400570
-  - Name:            .init
-    Type:            STT_SECTION
-    Section:         .init
-    Value:           0x401000
-  - Name:            .plt
-    Type:            STT_SECTION
-    Section:         .plt
-    Value:           0x401020
-  - Name:            .text
-    Type:            STT_SECTION
-    Section:         .text
-    Value:           0x401040
-  - Name:            .fini
-    Type:            STT_SECTION
-    Section:         .fini
-    Value:           0x4011A8
-  - Name:            .rodata
-    Type:            STT_SECTION
-    Section:         .rodata
-    Value:           0x402000
-  - Name:            .eh_frame_hdr
-    Type:            STT_SECTION
-    Section:         .eh_frame_hdr
-    Value:           0x402010
-  - Name:            .eh_frame
-    Type:            STT_SECTION
-    Section:         .eh_frame
-    Value:           0x402048
-  - Name:            .init_array
-    Type:            STT_SECTION
-    Section:         .init_array
-    Value:           0x403DE8
-  - Name:            .fini_array
-    Type:            STT_SECTION
-    Section:         .fini_array
-    Value:           0x403DF0
-  - Name:            .dynamic
-    Type:            STT_SECTION
-    Section:         .dynamic
-    Value:           0x403DF8
-  - Name:            .got
-    Type:            STT_SECTION
-    Section:         .got
-    Value:           0x403FC8
-  - Name:            .got.plt
-    Type:            STT_SECTION
-    Section:         .got.plt
-    Value:           0x403FE8
-  - Name:            .data
-    Type:            STT_SECTION
-    Section:         .data
-    Value:           0x404008
-  - Name:            .tm_clone_table
-    Type:            STT_SECTION
-    Section:         .tm_clone_table
-    Value:           0x404010
-  - Name:            .bss
-    Type:            STT_SECTION
-    Section:         .bss
-    Value:           0x404010
-  - Name:            .comment
-    Type:            STT_SECTION
-    Section:         .comment
-  - Name:            .gnu.build.attributes
-    Type:            STT_SECTION
-    Section:         .gnu.build.attributes
-    Value:           0x406018
-  - Name:            crt1.o
-    Type:            STT_FILE
-    Index:           SHN_ABS
-  - Name:            __abi_tag
-    Type:            STT_OBJECT
-    Section:         .note.ABI-tag
-    Value:           0x40037C
-    Size:            0x20
-  - Name:            crtstuff.c
-    Type:            STT_FILE
-    Index:           SHN_ABS
-  - Name:            __TMC_LIST__
-    Type:            STT_OBJECT
-    Section:         .tm_clone_table
-    Value:           0x404010
-  - Name:            deregister_tm_clones
-    Type:            STT_FUNC
-    Section:         .text
-    Value:           0x401080
-  - Name:            register_tm_clones
-    Type:            STT_FUNC
-    Section:         .text
-    Value:           0x4010B0
-  - Name:            __do_global_dtors_aux
-    Type:            STT_FUNC
-    Section:         .text
-    Value:           0x4010F0
-  - Name:            completed.0
-    Type:            STT_OBJECT
-    Section:         .bss
-    Value:           0x404010
-    Size:            0x1
-  - Name:            __do_global_dtors_aux_fini_array_entry
-    Type:            STT_OBJECT
-    Section:         .fini_array
-    Value:           0x403DF0
-  - Name:            frame_dummy
-    Type:            STT_FUNC
-    Section:         .text
-    Value:           0x401120
-  - Name:            __frame_dummy_init_array_entry
-    Type:            STT_OBJECT
-    Section:         .init_array
-    Value:           0x403DE8
-  - Name:            callcont-fallthru.c
-    Type:            STT_FILE
-    Index:           SHN_ABS
-  - Name:            'crtstuff.c (1)'
-    Type:            STT_FILE
-    Index:           SHN_ABS
-  - Name:            __FRAME_END__
-    Type:            STT_OBJECT
-    Section:         .eh_frame
-    Value:           0x4020F0
-  - Type:            STT_FILE
-    Index:           SHN_ABS
-  - Name:            _DYNAMIC
-    Type:            STT_OBJECT
-    Section:         .dynamic
-    Value:           0x403DF8
-  - Name:            __GNU_EH_FRAME_HDR
-    Section:         .eh_frame_hdr
-    Value:           0x402010
-  - Name:            _GLOBAL_OFFSET_TABLE_
-    Type:            STT_OBJECT
-    Section:         .got.plt
-    Value:           0x403FE8
-  - Name:            '__libc_start_main at GLIBC_2.34'
-    Type:            STT_FUNC
-    Binding:         STB_GLOBAL
-  - Name:            _ITM_deregisterTMCloneTable
-    Binding:         STB_WEAK
-  - Name:            data_start
-    Section:         .data
-    Binding:         STB_WEAK
-    Value:           0x404008
-  - Name:            _edata
-    Section:         .tm_clone_table
-    Binding:         STB_GLOBAL
-    Value:           0x404010
-  - Name:            _fini
-    Type:            STT_FUNC
-    Section:         .fini
-    Binding:         STB_GLOBAL
-    Value:           0x4011A8
-    Other:           [ STV_HIDDEN ]
-  - Name:            __data_start
-    Section:         .data
-    Binding:         STB_GLOBAL
-    Value:           0x404008
-  - Name:            __gmon_start__
-    Binding:         STB_WEAK
-  - Name:            __dso_handle
-    Type:            STT_OBJECT
-    Section:         .rodata
-    Binding:         STB_GLOBAL
-    Value:           0x402008
-    Other:           [ STV_HIDDEN ]
-  - Name:            _IO_stdin_used
-    Type:            STT_OBJECT
-    Section:         .rodata
-    Binding:         STB_GLOBAL
-    Value:           0x402000
-    Size:            0x4
-  - Name:            foo
-    Type:            STT_FUNC
-    Section:         .text
-    Binding:         STB_GLOBAL
-    Value:           0x401130
-    Size:            0x6
-  - Name:            _end
-    Section:         .bss
-    Binding:         STB_GLOBAL
-    Value:           0x404018
-  - Name:            _dl_relocate_static_pie
-    Type:            STT_FUNC
-    Section:         .text
-    Binding:         STB_GLOBAL
-    Value:           0x401070
-    Size:            0x5
-    Other:           [ STV_HIDDEN ]
-  - Name:            _start
-    Type:            STT_FUNC
-    Section:         .text
-    Binding:         STB_GLOBAL
-    Value:           0x401040
-    Size:            0x26
-  - Name:            __bss_start
-    Section:         .bss
-    Binding:         STB_GLOBAL
-    Value:           0x404010
-  - Name:            main
-    Type:            STT_FUNC
-    Section:         .text
-    Binding:         STB_GLOBAL
-    Value:           0x401140
-    Size:            0x66
-  - Name:            'atoi at GLIBC_2.2.5'
-    Type:            STT_FUNC
-    Binding:         STB_GLOBAL
-  - Name:            __TMC_END__
-    Type:            STT_OBJECT
-    Section:         .tm_clone_table
-    Binding:         STB_GLOBAL
-    Value:           0x404010
-    Other:           [ STV_HIDDEN ]
-  - Name:            _ITM_registerTMCloneTable
-    Binding:         STB_WEAK
-  - Name:            _init
-    Type:            STT_FUNC
-    Section:         .init
-    Binding:         STB_GLOBAL
-    Value:           0x401000
-    Other:           [ STV_HIDDEN ]
-DynamicSymbols:
-  - Name:            __libc_start_main
-    Type:            STT_FUNC
-    Binding:         STB_GLOBAL
-  - Name:            _ITM_deregisterTMCloneTable
-    Binding:         STB_WEAK
-  - Name:            __gmon_start__
-    Binding:         STB_WEAK
-  - Name:            atoi
-    Type:            STT_FUNC
-    Binding:         STB_GLOBAL
-  - Name:            _ITM_registerTMCloneTable
-    Binding:         STB_WEAK
-...
diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s
new file mode 100644
index 00000000000000..0a59d799f1ef3c
--- /dev/null
+++ b/bolt/test/X86/callcont-fallthru.s
@@ -0,0 +1,60 @@
+## Ensures that a call continuation fallthrough count is set when using
+## pre-aggregated perf data.
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: link_fdata %s %t.exe %t.pa PREAGG
+# RUN: llvm-strip --strip-unneeded %t.exe
+# RUN: llvm-bolt %t.exe --pa -p %t.pa -o %t.out \
+# RUN:   --print-cfg --print-only=main | FileCheck %s
+
+  .globl foo
+  .type foo, %function
+foo:
+	pushq	%rbp
+	movq	%rsp, %rbp
+	popq	%rbp
+Lfoo_ret:
+	retq
+.size foo, .-foo
+
+  .globl main
+  .type main, %function
+main:
+	pushq	%rbp
+	movq	%rsp, %rbp
+	subq	$0x20, %rsp
+	movl	$0x0, -0x4(%rbp)
+	movl	%edi, -0x8(%rbp)
+	movq	%rsi, -0x10(%rbp)
+	movq	-0x10(%rbp), %rax
+	movq	0x8(%rax), %rdi
+	movl	%eax, -0x14(%rbp)
+
+Ltmp4:
+	cmpl	$0x0, -0x14(%rbp)
+	je	Ltmp0
+
+	movl	$0xa, -0x18(%rbp)
+	callq	foo
+# PREAGG: B #Lfoo_ret# #Ltmp3# 1 0
+# CHECK:      callq foo
+# CHECK-NEXT: count: 1
+
+Ltmp3:
+	cmpl	$0x0, -0x18(%rbp)
+	jmp	Ltmp2
+
+Ltmp2:
+	movl	-0x18(%rbp), %eax
+	addl	$-0x1, %eax
+	movl	%eax, -0x18(%rbp)
+	jmp	Ltmp3
+	jmp	Ltmp4
+
+Ltmp0:
+	xorl	%eax, %eax
+	addq	$0x20, %rsp
+	popq	%rbp
+	retq
+.size main, .-main
diff --git a/bolt/test/X86/callcont-fallthru.test b/bolt/test/X86/callcont-fallthru.test
deleted file mode 100644
index e0a5c5a6852d42..00000000000000
--- a/bolt/test/X86/callcont-fallthru.test
+++ /dev/null
@@ -1,9 +0,0 @@
-## Reproduces missing call continuation fallthrough count when using
-## pre-aggregated perf data
-
-# RUN: yaml2obj %p/Inputs/callcont-fallthru.yaml > %t.exe
-# RUN: llvm-bolt %t.exe --pa -p %p/Inputs/callcont-fallthru.preagg -o %t.out \
-# RUN:   --print-cfg --print-only=main | FileCheck %s
-
-# CHECK:      callq foo
-# CHECK-NEXT: count: 103204

>From 06fe34d8817695c6769f199f5ac386ac2e872af4 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 21 Oct 2024 12:58:49 -0700
Subject: [PATCH 6/8] Added plt call (return from external location) test case

Created using spr 1.3.4
---
 bolt/test/X86/callcont-fallthru.s | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s
index 0a59d799f1ef3c..d10373720a6606 100644
--- a/bolt/test/X86/callcont-fallthru.s
+++ b/bolt/test/X86/callcont-fallthru.s
@@ -27,6 +27,12 @@ main:
 	movl	$0x0, -0x4(%rbp)
 	movl	%edi, -0x8(%rbp)
 	movq	%rsi, -0x10(%rbp)
+	callq	puts at PLT
+# PREAGG: B X:0 #Ltmp1# 2 0
+# CHECK:      callq puts at PLT
+# CHECK-NEXT: count: 2
+
+Ltmp1:
 	movq	-0x10(%rbp), %rax
 	movq	0x8(%rax), %rdi
 	movl	%eax, -0x14(%rbp)
@@ -51,6 +57,7 @@ Ltmp2:
 	movl	%eax, -0x18(%rbp)
 	jmp	Ltmp3
 	jmp	Ltmp4
+	jmp	Ltmp1
 
 Ltmp0:
 	xorl	%eax, %eax

>From e8ec9c936c31ae0ec0a56a224860045c0a2885a1 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Mon, 21 Oct 2024 14:32:11 -0700
Subject: [PATCH 7/8] Add test for getFallthroughsInTrace

Created using spr 1.3.4
---
 bolt/test/X86/callcont-fallthru.s | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s
index d10373720a6606..1feb283edb2217 100644
--- a/bolt/test/X86/callcont-fallthru.s
+++ b/bolt/test/X86/callcont-fallthru.s
@@ -4,10 +4,16 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
 # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
 # RUN: link_fdata %s %t.exe %t.pa PREAGG
+# RUN: link_fdata %s %t.exe %t.pa2 PREAGG2
 # RUN: llvm-strip --strip-unneeded %t.exe
 # RUN: llvm-bolt %t.exe --pa -p %t.pa -o %t.out \
 # RUN:   --print-cfg --print-only=main | FileCheck %s
 
+## Check that getFallthroughsInTrace correctly handles a trace starting at plt
+## call continuation
+# RUN: llvm-bolt %t.exe --pa -p %t.pa2 -o %t.out2 \
+# RUN:   --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK2
+
   .globl foo
   .type foo, %function
 foo:
@@ -32,6 +38,9 @@ main:
 # CHECK:      callq puts at PLT
 # CHECK-NEXT: count: 2
 
+# CHECK2:      callq puts at PLT
+# CHECK2-NEXT: count: 0
+
 Ltmp1:
 	movq	-0x10(%rbp), %rax
 	movq	0x8(%rax), %rdi
@@ -40,6 +49,8 @@ Ltmp1:
 Ltmp4:
 	cmpl	$0x0, -0x14(%rbp)
 	je	Ltmp0
+# CHECK2:      je .Ltmp0
+# CHECK2-NEXT: count: 3
 
 	movl	$0xa, -0x18(%rbp)
 	callq	foo
@@ -47,8 +58,13 @@ Ltmp4:
 # CHECK:      callq foo
 # CHECK-NEXT: count: 1
 
+# PREAGG2: F #Ltmp1# #Ltmp3_br# 3
+# CHECK2:      callq foo
+# CHECK2-NEXT: count: 3
+
 Ltmp3:
 	cmpl	$0x0, -0x18(%rbp)
+Ltmp3_br:
 	jmp	Ltmp2
 
 Ltmp2:

>From 9ac54dd2a2fd0e9b942b3a6399133d0428652b18 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Sun, 27 Oct 2024 11:43:47 -0700
Subject: [PATCH 8/8] Use return profile conversion for pre-aggregated profile
 only

Created using spr 1.3.4
---
 bolt/include/bolt/Profile/DataAggregator.h |  3 ++-
 bolt/lib/Profile/DataAggregator.cpp        | 27 ++++++++++++++++++----
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 6453b3070ceb8d..2880bfd03be789 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -266,7 +266,8 @@ class DataAggregator : public DataReader {
                      uint64_t Mispreds);
 
   /// Register a \p Branch.
-  bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds);
+  bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds,
+                bool IsPreagg);
 
   /// Register a trace between two LBR entries supplied in execution order.
   bool doTrace(const LBREntry &First, const LBREntry &Second,
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index fe371ef3ca1050..b1cd9db9fc481f 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -774,7 +774,7 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
 }
 
 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
-                              uint64_t Mispreds) {
+                              uint64_t Mispreds, bool IsPreagg) {
   // Returns whether \p Offset in \p Func contains a return instruction.
   auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
     auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
@@ -846,7 +846,7 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
     return false;
 
   // Record call to continuation trace.
-  if (IsCallCont && FromFunc != ToFunc) {
+  if (IsPreagg && IsCallCont && FromFunc != ToFunc) {
     LBREntry First{ToOrig - 1, ToOrig - 1, false};
     LBREntry Second{ToOrig, ToOrig, false};
     return doTrace(First, Second, Count);
@@ -956,6 +956,24 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF,
   if (!FromBB || !ToBB)
     return std::nullopt;
 
+  // Adjust FromBB if the first LBR is a return from the last instruction in
+  // the previous block (that instruction should be a call).
+  if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) &&
+      !FromBB->isEntryPoint() && !FromBB->isLandingPad()) {
+    const BinaryBasicBlock *PrevBB =
+        BF.getLayout().getBlock(FromBB->getIndex() - 1);
+    if (PrevBB->getSuccessor(FromBB->getLabel())) {
+      const MCInst *Instr = PrevBB->getLastNonPseudoInstr();
+      if (Instr && BC.MIB->isCall(*Instr))
+        FromBB = PrevBB;
+      else
+        LLVM_DEBUG(dbgs() << "invalid incoming LBR (no call): " << FirstLBR
+                          << '\n');
+    } else {
+      LLVM_DEBUG(dbgs() << "invalid incoming LBR: " << FirstLBR << '\n');
+    }
+  }
+
   // Fill out information for fall-through edges. The From and To could be
   // within the same basic block, e.g. when two call instructions are in the
   // same block. In this case we skip the processing.
@@ -1646,7 +1664,8 @@ void DataAggregator::processBranchEvents() {
   for (const auto &AggrLBR : BranchLBRs) {
     const Trace &Loc = AggrLBR.first;
     const TakenBranchInfo &Info = AggrLBR.second;
-    doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
+    doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount,
+             /*IsPreagg=*/false);
   }
 }
 
@@ -1807,7 +1826,7 @@ void DataAggregator::processPreAggregated() {
     switch (AggrEntry.EntryType) {
     case AggregatedLBREntry::BRANCH:
       doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
-               AggrEntry.Mispreds);
+               AggrEntry.Mispreds, /*IsPreagg=*/true);
       break;
     case AggregatedLBREntry::FT:
     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {



More information about the llvm-commits mailing list