<html>
    <head>
      <base href="https://llvm.org/bugs/" />
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW --- - extra memory load in loop"
   href="https://llvm.org/bugs/show_bug.cgi?id=27740">27740</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>extra memory load in loop
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Scalar Optimizations
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>carrot@google.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr>

        <tr>
          <th>Classification</th>
          <td>Unclassified
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Compile following code with trunk llvm

$ ~/llvm/obj2/bin/clang --target=powerpc64le-grtev4-linux-gnu -O2
-fno-unroll-loops -c t17.c -save-temps

struct s{
  int f0;
  int f1;
  float f2;
  float f3;
  float f4;
};

static void bar(struct s *p1, struct s *p2)
{
  if (p1->f0 && p1->f3 < p2->f3)
     p1->f3 = p2->f3;
  else
     p1->f3 = p2->f3;    // p1->f4 = p2->f4;
}

void foo(struct s *p1, struct s *p2, int size)
{
  for (int i=0; i<size; i++)
  {
    p1->f1 += p2->f1;
    p1->f2 -= p2->f2;
    bar(p1, p2);
    p2++;
  }
}

LLVM generates following code for the loop body

.LBB0_2:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
        lwzu 9, 20(4)          // A1
        lxsspx 1, 3, 7         //    B1
        lwz 10, 4(3)           // A2
        lxsspx 0, 4, 6         //    B2
        add 9, 10, 9           // A3
        stw 9, 4(3)            // A4
        xssubsp 0, 1, 0        //    B3
        stxsspx 0, 3, 7        //    B4
        bc 12, 2, .LBB0_5
# BB#3:                                 # %land.lhs.true.i
                                        #   in Loop: Header=BB0_2 Depth=1
        lxsspx 0, 4, 7
        lxsspx 1, 3, 8
        fcmpu 1, 1, 0
        bge 1, .LBB0_6
# BB#4:                                 # %if.then.i
                                        #   in Loop: Header=BB0_2 Depth=1
        stxsspx 0, 3, 8
        b .LBB0_8
        .p2align        4
.LBB0_5:                                # %entry.if.else_crit_edge.i
                                        #   in Loop: Header=BB0_2 Depth=1
        lwz 9, 8(4)
        b .LBB0_7
        .p2align        4
.LBB0_6:                                # %land.lhs.true.if.else_crit_edge.i
                                        #   in Loop: Header=BB0_2 Depth=1
        xscvdpspn 0, 0
        xxsldwi 0, 0, 0, 3
        mfvsrwz 9, 0
.LBB0_7:                                # %if.else.i
                                        #   in Loop: Header=BB0_2 Depth=1
        stw 9, 12(3)
.LBB0_8:                                # %bar.exit
                                        #   in Loop: Header=BB0_2 Depth=1
        addi 5, 5, -1
        bdnz .LBB0_2


Statement p1->f1 += p2->f1 is translated to A1,A2,A3,A4. Statement p1->f2 -=
p2->f2 is translated to B1,B2,B3,B4. Inside the loop, all stores are through
pointer p1 only, although p1 can be aliased with p2, it is safe to keep a copy
of p1->f1 and p1->f2 inside registers, so A2 and B1 can be removed from the
loop.

In the source code, if I replace one of "p1->f3 = p2->f3" by "p1->f4 = p2->f4",
then llvm can generate expected optimized code.</pre>
        </div>
      </p>
      <hr>
      <span>You are receiving this mail because:</span>
      
      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>