<div dir="ltr">We see performance regressions after this patch. A number of benchmarks regressed for more than 10%. One example is the flops-6.c from the LLVM test-suite. An isolated test based on that benchmark:<div><div><br></div><div>$ cat flops-6.c</div><div>extern int printf (const char *__restrict __format, ...);<br>double T[36];<br>double sa,sb,sc,sd,one,two;<br>double four,piref;<br>double scale;<br>double A1 = -0.1666666666671334;<br>double A2 = 0.833333333809067E-2;<br>double A3 = 0.198412715551283E-3;<br>double A4 = 0.27557589750762E-5;<br>double A5 = 0.2507059876207E-7;<br>double A6 = 0.164105986683E-9;<br>double B1 = -0.4999999999982;<br>double B2 = 0.4166666664651E-1;<br>double B3 = -0.1388888805755E-2;<br>double B4 = 0.24801428034E-4;<br>double B5 = -0.2754213324E-6;<br>double B6 = 0.20189405E-8;<br>int main()<br>{<br>   double s,u,v,w,x;<br>   long loops;<br>   register long i, m, n;<br>   printf("\n");<br>   printf("   FLOPS C Program (Double Precision), V2.0 18 Dec 1992\n\n");<br>   loops = 15625;<br>   piref = 3.14159265358979324;<br>   one = 1.0;<br>   two = 2.0;<br>   four = 4.0;<br>   scale = one;<br>   printf("   Module     Error        RunTime      MFLOPS\n");<br>   printf("                            (usec)\n");<br>   m = loops*10000;<br>   x = piref / ( four * (double)m );<br>   s = 0.0;<br>   v = 0.0;<br>   for( i = 1 ; i <= m-1 ; i++ )<br>   {<br>   u = (double)i * x;<br>   w = u * u;<br>   v = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);<br>   s = s + v*(w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one);<br>   }<br>   u = piref / four;<br>   w = u * u;<br>   sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);<br>   sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;<br>   sa = sa * sb;<br>   sa = x * ( sa + two * s ) / two;<br>   sb = 0.25;<br>   sc = sa - sb;<br>   printf("     6   %13.4lf  %10.4lf  %10.4lf\n",<br>          sc* 1e-30,<br>          0* 1e-30 ,<br>          0* 1e-30);<br>   return 0;<br>}<br></div><div>$ clang-base -O3 -maes -m64 -mcx16 -msse4.2 -mpclmul '-mprefer-vector-width=128' flops-6.c -o flops-6-base<br></div><div>$ clang-new -O3 -maes -m64 -mcx16 -msse4.2 -mpclmul '-mprefer-vector-width=128' flops-6.c -o flops-6-new</div><div>$ for i in $(seq 5) ; do time ./flops-6-base ; done<br>     6          0.0000      0.0000      0.0000<br><br>real    0m0.705s<br>user    0m0.700s<br>sys     0m0.004s<br>     6          0.0000      0.0000      0.0000<br><br>real    0m0.706s<br>user    0m0.704s<br>sys     0m0.001s<br>     6          0.0000      0.0000      0.0000<br><br>real    0m0.706s<br>user    0m0.705s<br>sys     0m0.001s<br>     6          0.0000      0.0000      0.0000<br><br>real    0m0.706s<br>user    0m0.704s<br>sys     0m0.001s<br>     6          0.0000      0.0000      0.0000<br><br>real    0m0.707s<br>user    0m0.705s<br>sys     0m0.001s<br>$ for i in $(seq 5) ; do time ./flops-6-new ; done<br>     6          0.0000      0.0000      0.0000<br><br>real    0m0.899s<br>user    0m0.898s<br>sys     0m0.000s<br>     6          0.0000      0.0000      0.0000<br><br>real    0m0.899s<br>user    0m0.898s<br>sys     0m0.000s<br>     6          0.0000      0.0000      0.0000<br><br>real    0m0.900s<br>user    0m0.899s<br>sys     0m0.000s<br>     6          0.0000      0.0000      0.0000<br><br>real    0m0.899s<br>user    0m0.898s<br>sys     0m0.000s<br>     6          0.0000      0.0000      0.0000<br><br>real    0m0.899s<br>user    0m0.898s<br>sys     0m0.000s<br></div></div><div><br></div><div>Can you take a look at this and maybe revert in the meantime?</div><div><br></div><div>Thanks!</div><div><br></div><div>-- Alex</div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Mon, May 10, 2021 at 4:10 PM Alexey Bataev via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><br>

Author: Alexey Bataev<br>

Date: 2021-05-10T07:08:07-07:00<br>

New Revision: 30463bc3f1839e8a238be4c137e2356f3cca2771<br>

<br>

URL: <a href="https://github.com/llvm/llvm-project/commit/30463bc3f1839e8a238be4c137e2356f3cca2771" rel="noreferrer" target="_blank">https://github.com/llvm/llvm-project/commit/30463bc3f1839e8a238be4c137e2356f3cca2771</a><br>

DIFF: <a href="https://github.com/llvm/llvm-project/commit/30463bc3f1839e8a238be4c137e2356f3cca2771.diff" rel="noreferrer" target="_blank">https://github.com/llvm/llvm-project/commit/30463bc3f1839e8a238be4c137e2356f3cca2771.diff</a><br>

<br>

LOG: [SLP]Do not count perfect diamond matches for gathers several times.<br>

<br>

Need to remove the old code for avoiding double counting of the gather<br>

nodes with perfect diamond matches within the tree after we started<br>

detecting perfect/shuffled matching in the previous patch D100495. We<br>

may skip the cost for such nodes completely.<br>

<br>

Differential Revision: <a href="https://reviews.llvm.org/D102023" rel="noreferrer" target="_blank">https://reviews.llvm.org/D102023</a><br>

<br>

Added: <br>

<br>

<br>

Modified: <br>

    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>

    llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll<br>

<br>

Removed: <br>

<br>

<br>

<br>

################################################################################<br>

diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>

index 22e090fd1d7c..e656b189c779 100644<br>

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>

+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>

@@ -4233,27 +4233,6 @@ InstructionCost BoUpSLP::getTreeCost() {<br>

   for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {<br>

     TreeEntry &TE = *VectorizableTree[I].get();<br>

<br>

-    // We create duplicate tree entries for gather sequences that have multiple<br>

-    // uses. However, we should not compute the cost of duplicate sequences.<br>

-    // For example, if we have a build vector (i.e., insertelement sequence)<br>

-    // that is used by more than one vector instruction, we only need to<br>

-    // compute the cost of the insertelement instructions once. The redundant<br>

-    // instructions will be eliminated by CSE.<br>

-    //<br>

-    // We should consider not creating duplicate tree entries for gather<br>

-    // sequences, and instead add additional edges to the tree representing<br>

-    // their uses. Since such an approach results in fewer total entries,<br>

-    // existing heuristics based on tree size may yield <br>

diff erent results.<br>

-    //<br>

-    if (TE.State == TreeEntry::NeedToGather &&<br>

-        std::any_of(std::next(VectorizableTree.begin(), I + 1),<br>

-                    VectorizableTree.end(),<br>

-                    [TE](const std::unique_ptr<TreeEntry> &EntryPtr) {<br>

-                      return EntryPtr->State == TreeEntry::NeedToGather &&<br>

-                             EntryPtr->isSame(TE.Scalars);<br>

-                    }))<br>

-      continue;<br>

-<br>

     InstructionCost C = getEntryCost(&TE);<br>

     Cost += C;<br>

     LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C<br>

<br>

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll<br>

index 31c63d31f4df..57db62ace206 100644<br>

--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll<br>

+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll<br>

@@ -10,7 +10,7 @@ target triple = "aarch64--linux-gnu"<br>

 ; REMARK-LABEL: Function: gather_multiple_use<br>

 ; REMARK:       Args:<br>

 ; REMARK-NEXT:    - String: 'Vectorized horizontal reduction with cost '<br>

-; REMARK-NEXT:    - Cost: '-16'<br>

+; REMARK-NEXT:    - Cost: '-7'<br>

 ;<br>

 ; REMARK-NOT: Function: gather_load<br>

<br>

<br>

<br>

<br>

_______________________________________________<br>

llvm-commits mailing list<br>

<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>

<a href="https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>

</blockquote></div>