<html>
  <head>
    <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
  </head>
  <body bgcolor="#FFFFFF" text="#000000">
    <p><br>
    </p>
    <br>
    <div class="moz-cite-prefix">On 03/14/2017 06:09 PM, Zaks, Ayal
      wrote:<br>
    </div>
    <blockquote
cite="mid:F94EFB4222EBC045B95E403612539C824CAAD8DC@HASMSX106.ger.corp.intel.com"
      type="cite">
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
      <meta name="Generator" content="Microsoft Word 15 (filtered
        medium)">
      <!--[if !mso]><style>v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style><![endif]-->
      <style><!--
/* Font Definitions */
@font-face
        {font-family:Helvetica;
        panose-1:2 11 6 4 2 2 2 2 2 4;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:Consolas;
        panose-1:2 11 6 9 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0cm;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman",serif;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {mso-style-priority:99;
        color:purple;
        text-decoration:underline;}
pre
        {mso-style-priority:99;
        mso-style-link:"HTML Preformatted Char";
        margin:0cm;
        margin-bottom:.0001pt;
        font-size:10.0pt;
        font-family:"Courier New";}
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph
        {mso-style-priority:34;
        margin-top:0cm;
        margin-right:0cm;
        margin-bottom:0cm;
        margin-left:36.0pt;
        margin-bottom:.0001pt;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;}
span.HTMLPreformattedChar
        {mso-style-name:"HTML Preformatted Char";
        mso-style-priority:99;
        mso-style-link:"HTML Preformatted";
        font-family:Consolas;}
span.EmailStyle20
        {mso-style-type:personal;
        font-family:"Calibri",sans-serif;
        color:#1F497D;}
span.EmailStyle21
        {mso-style-type:personal;
        font-family:"Calibri",sans-serif;
        color:#1F497D;}
span.EmailStyle22
        {mso-style-type:personal-reply;
        font-family:"Calibri",sans-serif;
        color:#1F497D;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:612.0pt 792.0pt;
        margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:395780924;
        mso-list-type:hybrid;
        mso-list-template-ids:1076887890 67698705 67698713 67698715 67698703 67698713 67698715 67698703 67698713 67698715;}
@list l0:level1
        {mso-level-text:"%1\)";
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level2
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level3
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l0:level4
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level5
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level6
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l0:level7
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level8
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level9
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l1
        {mso-list-id:711031633;
        mso-list-type:hybrid;
        mso-list-template-ids:-1063088596 67698705 67698713 67698715 67698703 67698713 67698715 67698703 67698713 67698715;}
@list l1:level1
        {mso-level-text:"%1\)";
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l1:level2
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l1:level3
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l1:level4
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l1:level5
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l1:level6
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l1:level7
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l1:level8
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l1:level9
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
ol
        {margin-bottom:0cm;}
ul
        {margin-bottom:0cm;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
      <div class="WordSection1">
        <p class="MsoNormal" style="margin-left:36.0pt"><a
            moz-do-not-send="true" name="_____replyseparator"></a><b><span
style="font-size:11.0pt;font-family:"Calibri",sans-serif">From:</span></b><span
style="font-size:11.0pt;font-family:"Calibri",sans-serif">
            Nema, Ashutosh [<a class="moz-txt-link-freetext" href="mailto:Ashutosh.Nema@amd.com">mailto:Ashutosh.Nema@amd.com</a>]
            <br>
            <br>
          </span></p>
        <p class="MsoNormal" style="margin-left:36.0pt">Summarizing the
          discussion on the implementation approaches.<o:p></o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt"><o:p> </o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt">Discussed about
          two approaches, first running ‘InnerLoopVectorizer’ again on
          the epilog loop immediately after vectorizing the original
          loop within the same vectorization pass, the second approach
          where re-running vectorization pass and limiting vectorization
          factor of epilog loop by metadata.<o:p></o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt"><o:p> </o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt"><Approach-2><o:p></o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt">Challenges with
          re-running the vectorizer pass:<o:p></o:p></p>
        <p class="MsoListParagraph"
          style="margin-left:72.0pt;text-indent:-18.0pt;mso-list:l0
          level1 lfo2">
          <!--[if !supportLists]--><span style="mso-list:Ignore">1)<span
              style="font:7.0pt "Times New Roman"">     
            </span></span><!--[endif]--><span dir="LTR"></span>Reusing
          alias check result: <o:p></o:p></p>
        <p class="MsoListParagraph" style="margin-left:72.0pt">When
          vectorizer pass runs again it finds the epilog loop as a new
          loop and it may generates alias check, this new alias check
          may overkill the gains of epilog vectorization.<o:p></o:p></p>
        <p class="MsoListParagraph" style="margin-left:72.0pt">We should
          use the already computed alias check result instead of re
          computing again.<o:p></o:p></p>
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D"><o:p> </o:p></span></p>
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D">Right, can this challenge be addressed
            – can we record the “simple” fact that the epilog loop is
            vectorizable with trip count at-most VF*UF when reached from
            the vectorized loop? This is akin to passing similar
            information from the front-end when supplied by, e.g.,
            OpenMP pragmas, with the additional path-sensitive context
            attached.<o:p></o:p></span></p>
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D"><o:p> </o:p></span></p>
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D">Agreed, if each loop is handled
            independently, the multiple minimum-trip-count tests should
            be revisited to optimize for smallest trip-count first.<o:p></o:p></span></p>
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D"><o:p> </o:p></span></p>
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D">If the main loop was vectorized by VF
            and unrolled by UF>1, it may be reasonable to vectorize
            the remainder loop with the same VF (w/o unrolling).</span></p>
      </div>
    </blockquote>
    <br>
    I agree; this is a good point. We need to consider VF*UF and scale
    back from there.<br>
    <br>
    <blockquote
cite="mid:F94EFB4222EBC045B95E403612539C824CAAD8DC@HASMSX106.ger.corp.intel.com"
      type="cite">
      <div class="WordSection1">
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D"> And then possibly vectorize the
            remainder of that with a smaller, say, VF/2. In addition,
            situations having small types and large vectors may result
            in large VF, again leaving room for possibly repeated epilog
            vectorizations with decreasing VF’s. At some point it would
            be good to try the alternative of a (final) masked vector
            epilog.</span></p>
      </div>
    </blockquote>
    <br>
    The follow-on to this is that we need to think carefully about how
    to do the cost modeling for this. We can't have so many checks along
    some paths that is defeats the benefit for some small loops with
    small trip counts.<br>
    <br>
     -Hal<br>
    <br>
    <blockquote
cite="mid:F94EFB4222EBC045B95E403612539C824CAAD8DC@HASMSX106.ger.corp.intel.com"
      type="cite">
      <div class="WordSection1">
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D"><o:p></o:p></span></p>
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D"><o:p> </o:p></span></p>
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D">Ayal.<o:p></o:p></span></p>
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D"><o:p> </o:p></span></p>
        <p class="MsoListParagraph" style="margin-left:0cm"><span
            style="color:#1F497D"><o:p> </o:p></span></p>
        <p class="MsoListParagraph"
          style="margin-left:72.0pt;text-indent:-18.0pt;mso-list:l0
          level1 lfo2">
          <!--[if !supportLists]--><span style="mso-list:Ignore">2)<span
              style="font:7.0pt "Times New Roman"">     
            </span></span><!--[endif]--><span dir="LTR"></span>Rerun the
          vectorizer and hoist the new alias check:<o:p></o:p></p>
        <p class="MsoListParagraph" style="margin-left:72.0pt">It’s not
          possible to hoist alias checks as its not fully redundant (not
          dominated by other checks), it’s not getting execute in all
          paths.<o:p></o:p></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri",sans-serif;color:#1F497D"><o:p> </o:p></span></p>
        <p class="MsoNormal" style="margin-left:36.0pt"><img
            id="Picture_x0020_1"
            src="cid:part2.21C5D7E3.78E2671C@anl.gov" height="156"
            width="567"><o:p></o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt"><o:p> </o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt">NOTE: We cannot
          prepone alias check as its expensive compared to other checks.<o:p></o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt"><o:p> </o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt"><Approach-1><o:p></o:p></p>
        <p class="MsoListParagraph"
          style="margin-left:72.0pt;text-indent:-18.0pt;mso-list:l1
          level1 lfo4">
          <!--[if !supportLists]--><span style="mso-list:Ignore">1)<span
              style="font:7.0pt "Times New Roman"">     
            </span></span><!--[endif]--><span dir="LTR"></span>Current
          patch depends on the existing functionality of LoopVectorizer,
          it uses ‘InnerLoopVectorizer’ again to vectorize the epilog
          loop, as it happens in the same vectorization pass we have
          flexibility to reuse already computed alias result check &
          limit vectorization factor for the epilog loop.
          <o:p></o:p></p>
        <p class="MsoListParagraph"
          style="margin-left:72.0pt;text-indent:-18.0pt;mso-list:l1
          level1 lfo4">
          <!--[if !supportLists]--><span style="mso-list:Ignore">2)<span
              style="font:7.0pt "Times New Roman"">     
            </span></span><!--[endif]--><span dir="LTR"></span>It does
          not generate the blocks for new block layout explicitly,
          rather it depends on ‘InnerLoopVectorizer::createEmptyLoop’ to
          generate new block layout. The new block layout get
          automatically generated by calling the ‘InnerLoopVectorizer::
          vectorize’ again.<o:p></o:p></p>
        <p class="MsoListParagraph"
          style="margin-left:72.0pt;text-indent:-18.0pt;mso-list:l1
          level1 lfo4">
          <!--[if !supportLists]--><span style="mso-list:Ignore">3)<span
              style="font:7.0pt "Times New Roman"">     
            </span></span><!--[endif]--><span dir="LTR"></span>Block
          layout description with epilog loop vectorization is available
          at<o:p></o:p></p>
        <p class="MsoListParagraph" style="margin-left:72.0pt"><a
            moz-do-not-send="true"
href="https://reviews.llvm.org/file/data/fxg5vx3capyj257rrn5j/PHID-FILE-x6thnbf6ub55ep5yhalu/LayoutDescription.png">https://reviews.llvm.org/file/data/fxg5vx3capyj257rrn5j/PHID-FILE-x6thnbf6ub55ep5yhalu/LayoutDescription.png</a><o:p></o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt"><o:p> </o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt">Approach-1 looks
          feasible, please comment if any objections.<o:p></o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt"><o:p> </o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt">Regards,<o:p></o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt">Ashutosh<o:p></o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt"><o:p> </o:p></p>
        <p class="MsoNormal" style="margin-left:36.0pt"><span
style="font-size:11.0pt;font-family:"Calibri",sans-serif;color:#1F497D"><o:p> </o:p></span></p>
        ...</div>
    </blockquote>
    <br>
    <pre class="moz-signature" cols="72">-- 
Hal Finkel
Lead, Compiler Technology and Programming Languages
Leadership Computing Facility
Argonne National Laboratory</pre>
  </body>
</html>