<html>
    <head>
      <base href="https://llvm.org/bugs/" />
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW --- - [ppc] slow instructions caused by unnecessary st/ld forwarding"
   href="https://llvm.org/bugs/show_bug.cgi?id=30316">30316</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>[ppc] slow instructions caused by unnecessary st/ld forwarding
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: PowerPC
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>carrot@google.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr>

        <tr>
          <th>Classification</th>
          <td>Unclassified
          </td>
        </tr></table>
      <p>
        <div>
        <pre>The source code is

class C{
public:
  C();

  bool Next() {
    ++current;
    return !Done();
  }

  bool Done() const { return current >= num; }
  int val() const { return vals[current];}

  int* vals;
  int current;
  int num;
};

int bar() {
  C c;

  int s = 0;
  while (c.Next())
      s |= c.val();
  return s;
}


While compiled with options 
--target=powerpc64le-grtev4-linux-gnu -m64 -O2 -mvsx -mcpu=power8

The while loop is translated to:

.LBB0_2:                                # %while.body
                                        # =>This Inner Loop Header: Depth=1
        extsw 7, 3
        addi 3, 4, 1              // ++current
        sldi 11, 7, 2
        lwzx 7, 6, 11
        stw 3, 112(31)            // store current
        ori 2, 2, 0
        ld 4, 112(31)             // load both current and num
        or 5, 7, 5
        rldicl 12, 4, 32, 32      // extract num
        cmpw 0, 4, 12
        blt      0, .LBB0_2

The interesting part is the code from c.Next(), after the variable current
stored to memory, it is immediately loaded together with another var num in one
load instruction. It triggers the slow store forwarding, or even slower than
store forwarding, since only partial value of the load is in store queue,
another part of the load is in cache.

Actually the value current is already in register 3, it doesn't need to be
loaded again.</pre>
        </div>
      </p>
      <hr>
      <span>You are receiving this mail because:</span>
      
      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>