[llvm-bugs] [Bug 30556] New: [ppc] redundant memory store and load for vector::push_back

via llvm-bugs llvm-bugs at lists.llvm.org
Wed Sep 28 15:07:41 PDT 2016


https://llvm.org/bugs/show_bug.cgi?id=30556

            Bug ID: 30556
           Summary: [ppc] redundant memory store and load for
                    vector::push_back
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: normal
          Priority: P
         Component: Backend: PowerPC
          Assignee: unassignedbugs at nondot.org
          Reporter: carrot at google.com
                CC: llvm-bugs at lists.llvm.org
    Classification: Unclassified

When compile following source code with options

-m64 -O2 -mvsx -mcpu=power8 -std=c++11 

#include <cstdint>
#include <vector>
#include <algorithm>

using std::vector;

typedef int64_t int64;
typedef uint64_t uint64;

struct O { 
  int64 f1; 
  int f2; 
  int f3; 
};

class B { 
 public:
  bool Add(int64 d, vector<uint64> p) {
    const int s = f5.size();
    O o;
    o.f1 = d;               // *
    o.f2 = s;               // *
    o.f3 = p.size();        // *
    f4.push_back(o);        // *
    return true;
  }

  int size() {
    return f4.size();
  }

  vector<O> f4; 
  vector<uint64> f5; 
};

vector<int64> foo();

int bar(vector<uint64>* y) {
  auto h = foo();
  B b;
  std::for_each(h.rbegin(), h.rend(), [&](const int64& hit) {
    b.Add(hit, *y);
  }); 

  return b.size();
}

LLVM generates following instructions for statements marked with *

         ...
.LBB0_8:                                #
%_ZNSt6vectorImSaImEEC2ERKS1_.exit.i.i
                                        #   in Loop: Header=BB0_2 Depth=1
        ld 3, 128(31)
        ld 4, 120(31)
        ld 5, 112(31)
        rldicl 6, 26, 61, 3
        std 23, 168(31)              // o.f1 = d;
        stw 6, 180(31)               // o.f3 = p.size();
        sub      4, 3, 4
        ld 3, 104(31)
        rldicl 4, 4, 61, 3
        stw 4, 176(31)               // o.f2 = s; 
        cmpld    3, 5                // vector full?
        beq      0, .LBB0_10
# BB#9:                                 # %if.then.i.i.i.i1
                                        #   in Loop: Header=BB0_2 Depth=1
        lxvd2x 0, 0, 28              // copy object o
        stxvd2x 0, 0, 3              // to vector f4
        ori 2, 2, 0
        ld 3, 104(31)
        addi 3, 3, 16
        std 3, 104(31)
        b .LBB0_11
        ...

Note that the several st instructions construct the local object o, lxvd2x
immediately load it into register, it is very slow due to store forwarding. At
the same time, the value of o are still in register r23,r6,r4, we can directly
store these registers to vector.
Or even better, it is not needed to construct the local object o in BB8, it is
only required in slow path and before calling
_ZNSt6vectorI1OSaIS0_EE19_M_emplace_back_auxIJRKS0_EEEvDpOT_.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20160928/a35f2913/attachment-0001.html>


More information about the llvm-bugs mailing list