<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<!--[if !mso]><style>v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style><![endif]--><style><!--
/* Font Definitions */
@font-face
{font-family:Wingdings;
panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
{font-family:"Cambria Math";
panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
{font-family:DengXian;
panose-1:2 1 6 0 3 1 1 1 1 1;}
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
{font-family:Consolas;
panose-1:2 11 6 9 2 2 4 3 2 4;}
@font-face
{font-family:"\@DengXian";
panose-1:2 1 6 0 3 1 1 1 1 1;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0in;
margin-bottom:.0001pt;
font-size:11.0pt;
font-family:"Calibri",sans-serif;}
h1
{mso-style-priority:9;
mso-style-link:"Heading 1 Char";
mso-margin-top-alt:auto;
margin-right:0in;
mso-margin-bottom-alt:auto;
margin-left:0in;
font-size:24.0pt;
font-family:"Calibri",sans-serif;
font-weight:bold;}
h2
{mso-style-priority:9;
mso-style-link:"Heading 2 Char";
mso-margin-top-alt:auto;
margin-right:0in;
mso-margin-bottom-alt:auto;
margin-left:0in;
font-size:18.0pt;
font-family:"Calibri",sans-serif;
font-weight:bold;}
a:link, span.MsoHyperlink
{mso-style-priority:99;
color:blue;
text-decoration:underline;}
span.Heading1Char
{mso-style-name:"Heading 1 Char";
mso-style-priority:9;
mso-style-link:"Heading 1";
font-family:"Calibri Light",sans-serif;
color:#2F5496;}
span.Heading2Char
{mso-style-name:"Heading 2 Char";
mso-style-priority:9;
mso-style-link:"Heading 2";
font-family:"Calibri Light",sans-serif;
color:#2F5496;}
span.EmailStyle21
{mso-style-type:personal-reply;
font-family:"Calibri",sans-serif;
color:windowtext;}
.MsoChpDefault
{mso-style-type:export-only;
font-size:10.0pt;}
@page WordSection1
{size:8.5in 11.0in;
margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
{page:WordSection1;}
/* List Definitions */
@list l0
{mso-list-id:952786928;
mso-list-template-ids:-2098930000;}
@list l1
{mso-list-id:990642305;
mso-list-template-ids:278691566;}
@list l1:level1
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
@list l1:level2
{mso-level-number-format:bullet;
mso-level-text:o;
mso-level-tab-stop:1.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:"Courier New";
mso-bidi-font-family:"Times New Roman";}
@list l1:level3
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:1.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l1:level4
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:2.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l1:level5
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:2.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l1:level6
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:3.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l1:level7
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:3.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l1:level8
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:4.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l1:level9
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:4.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l2
{mso-list-id:1012949738;
mso-list-type:hybrid;
mso-list-template-ids:108174198 2102983346 67698691 67698693 67698689 67698691 67698693 67698689 67698691 67698693;}
@list l2:level1
{mso-level-start-at:0;
mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-.25in;
font-family:Wingdings;
mso-fareast-font-family:DengXian;
mso-bidi-font-family:"Times New Roman";}
@list l2:level2
{mso-level-number-format:bullet;
mso-level-text:o;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-.25in;
font-family:"Courier New";}
@list l2:level3
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-.25in;
font-family:Wingdings;}
@list l2:level4
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-.25in;
font-family:Symbol;}
@list l2:level5
{mso-level-number-format:bullet;
mso-level-text:o;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-.25in;
font-family:"Courier New";}
@list l2:level6
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-.25in;
font-family:Wingdings;}
@list l2:level7
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-.25in;
font-family:Symbol;}
@list l2:level8
{mso-level-number-format:bullet;
mso-level-text:o;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-.25in;
font-family:"Courier New";}
@list l2:level9
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:none;
mso-level-number-position:left;
text-indent:-.25in;
font-family:Wingdings;}
@list l3
{mso-list-id:1066487212;
mso-list-template-ids:606871936;}
@list l4
{mso-list-id:1434667973;
mso-list-template-ids:-20008072;}
@list l4:level1
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
@list l4:level2
{mso-level-number-format:bullet;
mso-level-text:o;
mso-level-tab-stop:1.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:"Courier New";
mso-bidi-font-family:"Times New Roman";}
@list l4:level3
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:1.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l4:level4
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:2.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l4:level5
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:2.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l4:level6
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:3.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l4:level7
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:3.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l4:level8
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:4.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l4:level9
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:4.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l5
{mso-list-id:1719933417;
mso-list-template-ids:-1962400152;}
@list l5:level1
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Symbol;}
@list l5:level2
{mso-level-number-format:bullet;
mso-level-text:o;
mso-level-tab-stop:1.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:"Courier New";
mso-bidi-font-family:"Times New Roman";}
@list l5:level3
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:1.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l5:level4
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:2.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l5:level5
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:2.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l5:level6
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:3.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l5:level7
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:3.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l5:level8
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:4.0in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
@list l5:level9
{mso-level-number-format:bullet;
mso-level-text:;
mso-level-tab-stop:4.5in;
mso-level-number-position:left;
text-indent:-.25in;
mso-ansi-font-size:10.0pt;
font-family:Wingdings;}
ol
{margin-bottom:0in;}
ul
{margin-bottom:0in;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="EN-US" link="blue" vlink="purple">
<div class="WordSection1">
<p class="MsoNormal">Cool stuff – nice to see a late splitting pass in LLVM. <o:p>
</o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal">> <span style="font-family:"Arial",sans-serif;color:#202124">
Full Propeller optimizations include function splitting and layout optimizations, however it requires an additional round of profiling using perf on top of the peak (FDO/CSFDO + ThinLTO) binary. In this work we experiment with applying function splitting using
the instrumented profile in the build instead of adding an additional round of profiling.<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Arial",sans-serif;color:#202124"><o:p> </o:p></span></p>
<p class="MsoNormal">I’d expect propeller or BOLT to be more effective at doing this due to better post-inline profile. Of course the usability advantage of not needing a separate profile is very practical, but just wondering did you see profile quality getting
in the way here? <o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal">> <span style="font-family:"Arial",sans-serif;color:black">uses existing instrumentation based FDO or CSFDO profile information.<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Arial",sans-serif;color:black"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Arial",sans-serif;color:black">Similarly, with instrumentation FDO alone, the post-inline profile may not be accurate, so for this splitting, is it more effective when used with CSFDO? Was the evaluation result
from FDO or CSFDO?<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Arial",sans-serif;color:black"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Arial",sans-serif;color:black">Also wondering does this work with Sample FDO, and do you have numbers that you can share when used with Sample FDO?<o:p></o:p></span></p>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal">Thanks,<o:p></o:p></p>
<p class="MsoNormal">Wenlei<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in">
<p class="MsoNormal"><b><span style="font-size:12.0pt;color:black">From: </span></b><span style="font-size:12.0pt;color:black">llvm-dev <llvm-dev-bounces@lists.llvm.org> on behalf of Snehasish Kumar via llvm-dev <llvm-dev@lists.llvm.org><br>
<b>Reply-To: </b>Snehasish Kumar <snehasishk@google.com><br>
<b>Date: </b>Tuesday, August 4, 2020 at 5:41 PM<br>
<b>To: </b>llvm-dev <llvm-dev@lists.llvm.org>, David Li <davidxl@google.com>, Eric Christopher <echristo@google.com>, Sriraman Tallam <tmsriram@google.com>, aditya kumar <hiraditya@gmail.com>, "efriedma@codeaurora.org" <efriedma@codeaurora.org><br>
<b>Subject: </b>[llvm-dev] [RFC] Machine Function Splitter - Split out cold blocks from machine functions using profile data<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<div>
<h1 style="mso-margin-top-alt:20.0pt;margin-right:0in;margin-bottom:6.0pt;margin-left:0in;text-align:justify">
<span style="font-size:11.0pt;font-family:"Arial",sans-serif;color:black;font-weight:normal">Greetings,</span><o:p></o:p></h1>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">We present “Machine Function Splitter”, a codegen optimization pass which splits functions into hot and cold parts. This pass leverages the
basic block sections feature recently introduced in LLVM from the Propeller project. The pass targets functions with profile coverage, identifies cold blocks and moves them to a separate section. The linker groups all cold blocks across functions together,
decreasing fragmentation and improving icache and itlb utilization. Our experiments show >2% performance improvement on clang bootstrap, ~1% improvement on Google workloads and 1.6% mean performance improvement on SPEC IntRate 2017.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<h2 style="mso-margin-top-alt:.25in;margin-right:0in;margin-bottom:6.0pt;margin-left:0in;text-align:justify">
<span style="font-size:16.0pt;font-family:"Arial",sans-serif;color:black;font-weight:normal">Motivation</span><o:p></o:p></h2>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Recent work at Google has shown that aggressive, profile-driven inlining for performance has led to significant code bloat and icache fragmentation
(</span><span style="font-size:12.0pt"><a href="https://urldefense.proofpoint.com/v2/url?u=https-3A__research.google_pubs_pub48320_&d=DwMFaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=KfYo542rDdZQGClmgz-RBw&m=-cUmMKRcOXZHF-PpVxO_Dfg2mkIgP4L_QomIwDizeEE&s=5M8b2TUFoHuEJUjJjNRGvPxOEE0ktBbRfJVCoGAW4BQ&e="><span style="font-size:11.0pt;font-family:"Arial",sans-serif">AsmDB
- Ayers et al ‘2019</span></a></span><span style="font-family:"Arial",sans-serif;color:black">). We find that most functions 5 KiB or larger have inlined children more than 10 layers deep bringing in exponentially more code at each inline level, not all of
which is necessarily hot. Generally, in roughly half of even the hottest functions, more than 50% of the code bytes are never executed, but likely to be in the cache.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Function splitting is a well known compiler transformation primarily targeting improved code locality to improve performance. LLVM has a
middle-end, target agnostic </span><span style="font-size:12.0pt"><a href="https://urldefense.proofpoint.com/v2/url?u=https-3A__llvm.org_devmtg_2019-2D10_slides_Kumar-2DHotColdSplitting.pdf&d=DwMFaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=KfYo542rDdZQGClmgz-RBw&m=-cUmMKRcOXZHF-PpVxO_Dfg2mkIgP4L_QomIwDizeEE&s=xfh7APIZXGJikzsEVba8f1JsDG3aMqQCrlbanFKDvsI&e="><span style="font-size:11.0pt;font-family:"Arial",sans-serif">hot
cold splitting pass</span></a></span><span style="font-family:"Arial",sans-serif;color:black"> as well as a
</span><span style="font-size:12.0pt"><a href="https://github.com/llvm/llvm-project/blob/master/llvm/lib/Transforms/IPO/PartialInlining.cpp"><span style="font-size:11.0pt;font-family:"Arial",sans-serif">partial inlining pass</span></a></span><span style="font-family:"Arial",sans-serif;color:black">
which performs similar transformations, as noted by the authors in </span><span style="font-size:12.0pt"><a href="https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.llvm.org_pipermail_llvm-2Ddev_2020-2DJune_142429.html&d=DwMFaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=KfYo542rDdZQGClmgz-RBw&m=-cUmMKRcOXZHF-PpVxO_Dfg2mkIgP4L_QomIwDizeEE&s=dBjqHuzX7OwOwfdAUeWcmpcImYHy9ga6uGzqcdWFgAM&e="><span style="font-size:11.0pt;font-family:"Arial",sans-serif">a
recent email thread</span></a></span><span style="font-family:"Arial",sans-serif;color:black">. However, due to the timing of the respective passes as well as the code extraction techniques employed, the overall gains on large, complex applications leave headroom
for improvement. By deferring function splitting to the codegen phase we can maximize the opportunity to remove cold code as well as refine the code extraction technique. Furthermore, by performing function splitting very late, earlier passes can perform more
aggressive optimizations.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<h2 style="mso-margin-top-alt:.25in;margin-right:0in;margin-bottom:6.0pt;margin-left:0in;text-align:justify">
<span style="font-size:16.0pt;font-family:"Arial",sans-serif;color:black;font-weight:normal">Implementation</span><o:p></o:p></h2>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">We propose a new machine function splitting pass which leverages the
</span><span style="font-size:12.0pt"><a href="https://urldefense.proofpoint.com/v2/url?u=https-3A__reviews.llvm.org_D68063&d=DwMFaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=KfYo542rDdZQGClmgz-RBw&m=-cUmMKRcOXZHF-PpVxO_Dfg2mkIgP4L_QomIwDizeEE&s=kQjqw1Wv-ojIDbl02uxcO-f40lom71ytRXVVHp1WbNI&e="><span style="font-size:11.0pt;font-family:"Arial",sans-serif">basic
block sections feature</span></a></span><span style="font-family:"Arial",sans-serif;color:black"> to split functions without the caveats of code extraction in the middle-end. The pass uses profile information to identify cold basic blocks very late in LLVM
CodeGen, after regalloc and all other machine passes have executed. This allows our implementation to be precise in its assessment of cold regions while maximizing opportunity.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Each function is split into two parts. The hot cluster includes the function entry and all blocks which are not cold. All the cold blocks
are grouped together as a </span><span style="font-size:12.0pt"><a href="https://github.com/llvm/llvm-project/blob/5934df0c9abe94fc450fbcf0ceca21cf838840e9/llvm/include/llvm/CodeGen/MachineBasicBlock.h#L63"><span style="font-size:11.0pt;font-family:"Arial",sans-serif">Cold
Section cluster</span></a></span><span style="font-family:"Arial",sans-serif;color:black">. With basic block sections, the cold blocks are assigned appropriate debug and call frame information and emitted as part of the .text.unlikely section. Unlike
</span><span style="font-size:12.0pt"><a href="https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.llvm.org_pipermail_llvm-2Ddev_2019-2DSeptember_135393.html&d=DwMFaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=KfYo542rDdZQGClmgz-RBw&m=-cUmMKRcOXZHF-PpVxO_Dfg2mkIgP4L_QomIwDizeEE&s=y0u_TamS9xnHRAQVD1cDCxl-AzE-QbTNmnYU73oxxFE&e="><span style="font-size:11.0pt;font-family:"Arial",sans-serif">Propeller</span></a></span><span style="font-family:"Arial",sans-serif;color:black">,
which is presently the main user of the basic block sections feature, this pass does not require an additional round of profiling and uses existing instrumentation based FDO or CSFDO profile information.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"><img border="0" width="770" height="577" style="width:8.0208in;height:6.0104in" id="_x0000_i1025" src="cid:image001.png@01D66C53.5D008990"><o:p></o:p></span></p>
</div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">In the illustration above, the functions foo and bar contain a cold block each, index 5 and E respectively. We show a possible layout for
these functions which optimizes for fall throughs. Note that all the blocks are kept in a contiguous region described by the symbols foo and bar. Using the machine function splitter, the cold blocks (5 and E) are moved to a separate section. These blocks can
then be grouped along with other cold blocks (and functions) in a separate output section in the final binary. The key highlights of this approach are:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<ul style="margin-top:0in" type="disc">
<li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l5 level1 lfo1;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">Profile driven, profile type agnostic approach.<o:p></o:p></span></li><li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l5 level1 lfo1;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">Cold basic blocks are split out using jumps.<o:p></o:p></span></li><li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l5 level1 lfo1;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">No additional instructions are added to the function for setup/teardown.<o:p></o:p></span></li><li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l5 level1 lfo1;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">Runs as the last step before emitting assembly, no analysis/optimizations are hindered.<o:p></o:p></span></li></ul>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Exceptions</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">All eh pads are grouped together regardless of their coldness and are part of the original function. There are outstanding issues with splitting
eh pads if they reside in separate sections in the binary. This remains as part of future work.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">DebugInfo and CFI </span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Debug information and CFI directives are updated and kept consistent by the underlying basic block sections framework. Support added in the
following patches</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<ul style="margin-top:0in" type="disc">
<li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l4 level1 lfo2;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">DebugInfo (<a href="https://urldefense.proofpoint.com/v2/url?u=https-3A__reviews.llvm.org_D78851&d=DwMFaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=KfYo542rDdZQGClmgz-RBw&m=-cUmMKRcOXZHF-PpVxO_Dfg2mkIgP4L_QomIwDizeEE&s=A8or2dRzqfxiaH66OGKD5iVw4mOcqafXaCHhJpRLdYs&e=">https://reviews.llvm.org/D78851</a>)<o:p></o:p></span></li><li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l4 level1 lfo2;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">CFI (<a href="https://urldefense.proofpoint.com/v2/url?u=https-3A__reviews.llvm.org_D79978&d=DwMFaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=KfYo542rDdZQGClmgz-RBw&m=-cUmMKRcOXZHF-PpVxO_Dfg2mkIgP4L_QomIwDizeEE&s=9AuVMR_KTlutU3emsXEzWynp-9eD1yE_42wF7O3DP6o&e=">https://reviews.llvm.org/D79978</a>).<o:p></o:p></span></li></ul>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><b><span style="font-family:"Arial",sans-serif;color:black"><br>
<br>
</span></b><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><b><span style="font-family:"Arial",sans-serif;color:black">Distinction between Machine Function Splitter and Propeller</span></b><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="mso-margin-top-alt:0in;margin-right:0in;margin-bottom:10.0pt;margin-left:0in;text-align:justify">
<span style="font-family:"Arial",sans-serif;color:#202124"><br>
Full Propeller optimizations include function splitting and layout optimizations, however it requires an additional round of profiling using perf on top of the peak (FDO/CSFDO + ThinLTO) binary. In this work we experiment with applying function splitting using
the instrumented profile in the build instead of adding an additional round of profiling.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="mso-margin-top-alt:0in;margin-right:0in;margin-bottom:10.0pt;margin-left:0in;text-align:justify">
<span style="font-size:12.0pt"><a href="https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.llvm.org_pipermail_llvm-2Ddev_2019-2DSeptember_135393.html&d=DwMFaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=KfYo542rDdZQGClmgz-RBw&m=-cUmMKRcOXZHF-PpVxO_Dfg2mkIgP4L_QomIwDizeEE&s=y0u_TamS9xnHRAQVD1cDCxl-AzE-QbTNmnYU73oxxFE&e="><span style="font-size:11.0pt;font-family:"Arial",sans-serif">Link
to Propeller RFC</span></a><o:p></o:p></span></p>
<p style="mso-margin-top-alt:0in;margin-right:0in;margin-bottom:10.0pt;margin-left:0in;text-align:justify">
<b><span style="font-family:"Arial",sans-serif;color:black"><br>
<br>
</span></b><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="mso-margin-top-alt:0in;margin-right:0in;margin-bottom:10.0pt;margin-left:0in;text-align:justify">
<b><span style="font-family:"Arial",sans-serif;color:black">Split Binary Characteristics</span></b><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="mso-margin-top-alt:0in;margin-right:0in;margin-bottom:10.0pt;margin-left:0in;text-align:justify">
<span style="font-family:"Arial",sans-serif;color:black">Binaries produced by the compiler with function splitting enabled contain additional symbols. A function which has been split into a hot and cold part is non-contiguous. The symbol table entry for the
hot part retains the symbol name of the original function with type FUNC. The symbol for the cold part contains a “.cold” suffix attached to the original symbol name, the type is not set for this symbol. Using a suffix has been the norm for such optimizations
e.g. -hot-cold-split in LLVM and the prior GCC implementation detailed earlier. We expect standardized tooling to handle split functions appropriately, e.g demangling works as expected --</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" style="border-collapse:collapse">
<tbody>
<tr>
<td valign="top" style="border:solid #E0E0E0 1.0pt;background:#FAFAFA;padding:5.0pt 5.0pt 5.0pt 5.0pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:Consolas;color:black">$ c</span><span style="font-size:10.0pt;font-family:Consolas;color:#616161">++</span><span style="font-size:10.0pt;font-family:Consolas;color:black">filt
_Z3foov</span><span style="font-size:10.0pt;font-family:Consolas;color:#616161">.</span><span style="font-size:10.0pt;font-family:Consolas;color:black">cold</span><o:p></o:p></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:Consolas;color:black">foo</span><span style="font-size:10.0pt;font-family:Consolas;color:#616161">()</span><span style="font-size:10.0pt;font-family:Consolas;color:black">
</span><span style="font-size:10.0pt;font-family:Consolas;color:#616161">[</span><span style="font-size:10.0pt;font-family:Consolas;color:black">clone
</span><span style="font-size:10.0pt;font-family:Consolas;color:#616161">.</span><span style="font-size:10.0pt;font-family:Consolas;color:black">cold</span><span style="font-size:10.0pt;font-family:Consolas;color:#616161">]</span><o:p></o:p></p>
</td>
</tr>
</tbody>
</table>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<h2 style="mso-margin-top-alt:.25in;margin-right:0in;margin-bottom:6.0pt;margin-left:0in;text-align:justify">
<span style="font-size:16.0pt;font-family:"Arial",sans-serif;color:black;font-weight:normal">Contrast with HotColdSplit (HCS)</span><o:p></o:p></h2>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Function splitting in the middle-end in LLVM employs extraction of cold single-entry-single-exit (SESE) regions into separate functions.
In general, the pass has been found to be impactful in reducing code size by deduplication of cold regions; however our experiments show it does not improve performance of large workloads. </span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">The key differences are:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><b><span style="font-family:"Arial",sans-serif;color:black">Extraction methodology and tradeoffs</span></b><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">HCS extracts cold code from SESE regions using a function call. This may incur a spill and fill of caller registers along with additional
setup and teardown if live values modified in the cold region need to be communicated back to the original function. This has a couple of implications</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<ol style="margin-top:0in" start="1" type="1">
<li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l0 level1 lfo3;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">The “residue” of each extracted region is non-trivial and there is a tradeoff between the amount of code that needs to be cold before it is profitable to extract. Thus the cost of mischaracterization is high.<o:p></o:p></span></li><li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l0 level1 lfo3;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">Since each SESE region is extracted separately the net reduction in code size of the original function is less.<br>
<br>
<br>
<o:p></o:p></span></li></ol>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">In contrast, the machine function splitter extracts cold code into a separate section. Control is transferred to cold code via jumps. More
often than not these jumps may already exist as part of the original layout thus incurring no additional cost. No additional instructions are inserted to accommodate splitting. Finally, no additional setup/teardown is necessary for live values modified in
cold regions.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><b><span style="font-family:"Arial",sans-serif;color:black">Pass timing and interaction with other optimizations</span></b><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">The HCS pass is run on the IR in the optimizer. This allows it to be target agnostic and allow later stages to merge identical code if necessary.
However, there are some drawbacks to this approach. In particular,</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<ol style="margin-top:0in" start="1" type="1">
<li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l3 level1 lfo4;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">Splitting early may miss opportunities introduced by later passes such as library call inlining and CFG simplification resulting from a combination of optimizations. Furthermore, this may not play well with optimization
passes such as MachineOutliner.<o:p></o:p></span></li><li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l3 level1 lfo4;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">Synergistic optimizations are harder to reason about due to the pass timing. For example, inlining can be more aggressive if any cold code introduced is trimmed.<br>
<br>
<br>
<o:p></o:p></span></li></ol>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">In contrast, the machine function splitter runs as the last step in codegen. This ensures that the opportunity for splitting is maximised
without hindering existing analyses and synergistic decisions can be made in earlier optimization passes. We rely on accurate profile count propagation across optimizations to maximise opportunities. This works particularly well for instrumented profiles while
improving the pass for sampled profiles is ongoing work.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">We have provided a contrived example in the Appendix which demonstrates the code generated for both approaches. The key differences are highlighted
inline.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<h2 style="mso-margin-top-alt:.25in;margin-right:0in;margin-bottom:6.0pt;margin-left:0in;text-align:justify">
<span style="font-size:16.0pt;font-family:"Arial",sans-serif;color:black;font-weight:normal">Evaluation</span><o:p></o:p></h2>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">In this section, we present an in-depth evaluation of the impact on clang bootstrap and summary results for two google internal workloads,
Search1 and Search2 as well overall results on the SPECInt 2017 benchmarks. All experiments are conducted on Intel Skylake based systems unless otherwise noted. Profile guided optimizations using instrumented profiles are enabled for all builds.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><b><span style="font-family:"Arial",sans-serif;color:black">clang-bootstrap</span></b><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">We pick 500 compiler invocations from a bootstrap build of clang and then evaluate the performance of a PGO+ThinLTO optimized version with
that of PGO+ThinLTO+Split compiler. For the latter, the final optimized build includes the machine function splitter.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Results: </span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">We observe a mean 2.33% improvement in end to end runtime. The improvements in runtime are driven by reduction in icache and TLB miss rates.
The table below summarizes our experiment, each data point is averaged over multiple iterations. The observed variation for each metric is < 1%.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" style="border-collapse:collapse">
<tbody>
<tr style="height:26.25pt">
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:26.25pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">Event</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-left:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:26.25pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">Split (MPKI)</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-left:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:26.25pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">Baseline (MPKI)</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-left:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:26.25pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">% Reduction</span><o:p></o:p></p>
</td>
</tr>
<tr style="height:15.75pt">
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-top:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">itlb_miss</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">0.87</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">1.28</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">31.70</span><o:p></o:p></p>
</td>
</tr>
<tr style="height:15.75pt">
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-top:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">stlb_miss</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">0.08</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">0.12</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">32.51</span><o:p></o:p></p>
</td>
</tr>
<tr style="height:15.75pt">
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-top:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">l1i_miss</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">5.98</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">6.61</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">9.56</span><o:p></o:p></p>
</td>
</tr>
<tr style="height:15.75pt">
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-top:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">l2_miss</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">0.27</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">0.34</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">20.02</span><o:p></o:p></p>
</td>
</tr>
</tbody>
</table>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">In this experiment, the function splitting pass moved cold code from ~30K functions in .text and .text.hot. We present a comparison of the
binary contents using </span><span style="font-size:12.0pt"><a href="https://github.com/google/bloaty"><span style="font-size:11.0pt;font-family:"Arial",sans-serif">bloaty</span></a><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> FILE SIZE VM SIZE </span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> -------------- -------------- </span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> +23% +8.26Mi +23% +8.26Mi .text.unlikely</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> +6.5% +761Ki [ = ] 0 .strtab</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> +4.8% +247Ki +4.8% +247Ki .eh_frame</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> +6.1% +193Ki [ = ] 0 .symtab</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> +8.5% +63.1Ki +8.5% +63.1Ki .eh_frame_hdr</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> +0.3% +31.3Ki +0.3% +31.3Ki .rodata</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> +0.4% +3 [ = ] 0 [Unmapped]</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> -0.3% -8 -0.3% -8 .init_array</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> [ = ] 0 -33.3% -8 [LOAD #4 [RW]]</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> [ = ] 0 -0.2% -416 .bss</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> -57.1% -4.04Mi -57.1% -4.04Mi .text.hot</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> -48.4% -4.13Mi -48.4% -4.13Mi .text</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Courier New";color:black"> +1.6% +1.35Mi +0.6% +430Ki TOTAL</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">We see that 48% and 57% of code in .text and .text.hot respectively was moved to the .text.unlikely section. We also note a small increase
in overall binary size due to the following reasons:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<ul style="margin-top:0in" type="disc">
<li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l1 level1 lfo5;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">Some additional jump instructions may be inserted.<o:p></o:p></span></li><li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l1 level1 lfo5;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">Small increase in associated metadata, e.g. debug information.<o:p></o:p></span></li><li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l1 level1 lfo5;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">Additional symbols of type foo.cold for cold parts.<o:p></o:p></span></li><li style="color:black;margin-top:0in;margin-bottom:0in;margin-bottom:.0001pt;text-align:justify;mso-list:l1 level1 lfo5;vertical-align:baseline;font-variant-numeric:normal;font-variant-east-asian:normal">
<span style="font-family:"Arial",sans-serif">Alignment requirements for both original and split function parts.<o:p></o:p></span></li></ul>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><i><span style="font-family:"Arial",sans-serif;color:black">Comparison with HotColdSplit</span></i><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">For the clang-bootstrap benchmark we also compared the performance of the hot-cold-split pass with split-machine-functions. We summarize
the results for performance and the characteristics of the binary built by each pass in the table below. Each metric is presented as change vs the baseline, an FDO optimized build of clang.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<table class="MsoNormalTable" border="0" cellspacing="0" cellpadding="0" style="border-collapse:collapse">
<tbody>
<tr style="height:15.75pt">
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
</td>
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-left:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">Hot Cold Split</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-left:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">Machine Function Splitter</span><o:p></o:p></p>
</td>
</tr>
<tr style="height:15.75pt">
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-top:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">Performance</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">1.10%</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">2.65%</span><o:p></o:p></p>
</td>
</tr>
<tr style="height:15.75pt">
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-top:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">.text size</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">-41.5% -2.89Mi</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">-49.2% -3.43Mi</span><o:p></o:p></p>
</td>
</tr>
<tr style="height:15.75pt">
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-top:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">.text.hot size</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">-46.9% -2.52Mi</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">-57.1% -3.07Mi</span><o:p></o:p></p>
</td>
</tr>
<tr style="height:15.75pt">
<td valign="bottom" style="border:solid #CCCCCC 1.0pt;border-top:none;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">Full binary size</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">9.6% +7.56Mi</span><o:p></o:p></p>
</td>
<td valign="bottom" style="border-top:none;border-left:none;border-bottom:solid #CCCCCC 1.0pt;border-right:solid #CCCCCC 1.0pt;padding:2.0pt 2.0pt 2.0pt 2.0pt;height:15.75pt;overflow:hidden">
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:black">1.7% +1.37Mi</span><o:p></o:p></p>
</td>
</tr>
</tbody>
</table>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Note that the increase in overall binary size increase for HCS is due to the increase in .eh_frame (+61% +3.03Mi). HCS extracts each cold
SESE region as a separate function whereas the machine function splitter extracts the cold code as a single region thus incurring a constant overhead per function.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><b><span style="font-family:"Arial",sans-serif;color:black">Google workloads</span></b><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">We evaluated the impact of function splitting on a couple of search workloads, Search1 and Search2. A key difference with respect to the
clang experiment above is the use of huge pages for code. Overall, we find that on Intel Skylake the key benefit is from reduction of iTLB misses whereas on AMD the key benefit is from the reduction of icache misses. This is due to the fewer iTLB entries available
for hugepages on Intel architectures. We find that overall throughput for Search1 and Search2 improve between 0.8% to 1.2%; a significant improvement on these benchmarks. The workloads are built with FDO and CSFDO respectively. On Intel Skylake, iTLB misses
reduce by 16% to 35%, sTLB misses reduce by 62% to 67%. On AMD, L1 icache misses improve by 1.2% to 2.6% whereas L2 instruction misses improve by 4.8% to 5.1%. </span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><i><span style="font-family:"Arial",sans-serif;color:black">Comparison with HotColdSplit</span></i><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">An evaluation of the hot-cold-split pass did not yield performance improvements on google workloads.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><b><span style="font-family:"Arial",sans-serif;color:black">SPECInt 2017</span></b><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">We evaluated the impact of the machine function splitter on SPECInt 2017 using the int rate metrics. Overall, we found a 1.6% geomean intrate
improvement for the benchmarks where performance improved (500.perlbench_r, 502.gcc_r, 505.mcf_r, 520.omnetpp_r). For the benchmarks that didn’t improve performance, the average degradation was 0.6% (523.xalancbmk_r, 525.x264_r, 531.deepsjeng_r, 541.leela_r). </span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">We note that the instruction footprint of SPEC workloads are smaller than most modern workloads and our work is primarily focused on reducing
the footprint to improve performance. These experiments were performed on Intel Haswell machines.</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<h2 style="mso-margin-top-alt:.25in;margin-right:0in;margin-bottom:6.0pt;margin-left:0in;text-align:justify">
<span style="font-size:16.0pt;font-family:"Arial",sans-serif;color:black;font-weight:normal">Appendix</span><o:p></o:p></h2>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Example to illustrate hot-cold-split and split-machine-functions</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Input IR</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">```</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">@i = external global i32, align 4</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">define i32 @foo(i32 %0, i32 %1) nounwind !prof !1 {</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> %3 = icmp eq i32 %0, 0</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> br i1 %3, label %6, label %4, !prof !2</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">4: ; preds = %2</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> %5 = call i32 @L1()</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> br label %9</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">6: ; preds = %2</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> %7 = call i32 @R1()</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> %8 = add nsw i32 %1, 1</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> br label %9</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">9: ; preds = %6, %4</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> %10 = phi i32 [ %1, %4 ], [ %8, %6 ]</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> %11 = load i32, i32* @i, align 4</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> %12 = add nsw i32 %10, %11</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> store i32 %12, i32* @i, align 4</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> ret i32 %12</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">}</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">declare i32 @L1()</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">declare i32 @R1() cold nounwind</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">!1 = !{!"function_entry_count", i64 7}</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">!2 = !{!"branch_weights", i32 0, i32 7}</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">```</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Code generated by Machine Function Splitter</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">$ llc < example.ll -mtriple=x86_64-unknown-linux-gnu -split-machine-functions</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">```</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .text</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .file "<stdin>"</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .globl foo # -- Begin function foo</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .p2align 4, 0x90</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .type foo,@function</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">foo: # @foo</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"># %bb.0:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> pushq %rbx</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> movl %esi, %ebx</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> testl %edi, %edi</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> <span style="background:yellow">je foo.cold # Jump to cold code</span></span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"># %bb.1:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> callq L1</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">.LBB0_2:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> addl i(%rip), %ebx</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> movl %ebx, i(%rip)</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> movl %ebx, %eax</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> popq %rbx</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> retq</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .section .text.unlikely.foo,"ax",@progbits</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">foo.cold:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> callq R1</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> <span style="background:yellow">incl %ebx # Directly increment value</span></span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> jmp .LBB0_2</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">.LBB_END0_3:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .size foo.cold, .LBB_END0_3-foo.cold</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .text</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">.Lfunc_end0:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .size foo, .Lfunc_end0-foo</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> # -- End function</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .section ".note.GNU-stack","",@progbits</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">```</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">Code generated by Hot Cold Split</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">$ clang -c -O2 -S -mllvm --hot-cold-split -mllvm --hotcoldsplit-threshold=0 -x ir example.ll</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">```</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .text</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .file "example.ll"</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .globl foo # -- Begin function foo</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .p2align 4, 0x90</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .type foo,@function</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">foo: # @foo</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"># %bb.0:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> pushq %rbx</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> subq $16, %rsp</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> movl %esi, %ebx</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> testl %edi, %edi</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> jne .LBB0_1</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black;background:yellow"># %bb.2: # Residue block in original function</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> leaq 12(%rsp), %rsi</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> <span style="background:yellow">movl %ebx, %edi # Pass param to increment</span></span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> <span style="background:yellow">callq foo.cold.1 # Call to cold code</span></span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> <span style="background:yellow">movl 12(%rsp), %ebx # Fill incremented value from stack</span></span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">.LBB0_3:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> addl i(%rip), %ebx</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> movl %ebx, i(%rip)</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> movl %ebx, %eax</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> addq $16, %rsp</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> popq %rbx</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> retq</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">.LBB0_1:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> callq L1</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> jmp .LBB0_3</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">.Lfunc_end0:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .size foo, .Lfunc_end0-foo</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> # -- End function</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .p2align 4, 0x90 # -- Begin function foo.cold.1</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .type foo.cold.1,@function</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">foo.cold.1: # @foo.cold.1</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"># %bb.0: # %newFuncRoot</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> pushq %rbp</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> pushq %rbx</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> pushq %rax</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> movq %rsi, %rbx</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> movl %edi, %ebp</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> callq R1</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> incl %ebp</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> movl %ebp, (%rbx)</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> addq $8, %rsp</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> popq %rbx</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> popq %rbp</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> retq</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black">.Lfunc_end1:</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .size foo.cold.1, .Lfunc_end1-foo.cold.1</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> # -- End function</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .cg_profile foo, L1, 0</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .cg_profile foo, foo.cold.1, 7</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .section ".note.GNU-stack","",@progbits</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .addrsig</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-size:10.0pt;font-family:"Courier New";color:black"> .addrsig_sym foo.cold.1</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black">```</span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
</div>
<p class="MsoNormal"><span style="font-size:12.0pt">Thanks,<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">Snehasish Kumar<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt">Software Engineer, Google<o:p></o:p></span></p>
<p style="margin:0in;margin-bottom:.0001pt;text-align:justify"><span style="font-family:"Arial",sans-serif;color:black"> </span><span style="font-size:12.0pt"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:12.0pt"><o:p> </o:p></span></p>
</div>
</div>
</div>
</body>
</html>