<html>
    <head>
      <base href="http://llvm.org/bugs/" />
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW --- - Inefficient zeroing sequence on Sandybridge"
   href="http://llvm.org/bugs/show_bug.cgi?id=15462">15462</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Inefficient zeroing sequence on Sandybridge
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: X86
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>nrotem@apple.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvmbugs@cs.uiuc.edu
          </td>
        </tr>

        <tr>
          <th>Classification</th>
          <td>Unclassified
          </td>
        </tr></table>
      <p>
        <div>
        <pre>The following IR sequence is combined during DAGCombine into a wide 256-bit
store. 

define void @foo(i64* %p) {
 store i64 0, i64* %p
 %q = getelementptr i64* %p, i64 1
 store i64 0, i64* %q
 %r = getelementptr i64* %p, i64 2
 store i64 0, i64* %r
 %s = getelementptr i64* %p, i64 3
 store i64 0, i64* %s
 ret void
}


But the lowering of Store operations does not know that this is a zero and it
creates a split store which is inefficient.

bin/Debug/llc -O2 -mtriple=x86_64-apple-darwin12 
    .section    __TEXT,__text,regular,pure_instructions
    .globl    _foo
    .align    4, 0x90
_foo:                                   ## @foo
    .cfi_startproc
## BB#0:
    vxorps    %xmm0, %xmm0, %xmm0
    vextractf128    $1, %ymm0, %xmm1   <-------- bah
    vmovups    %xmm1, 16(%rdi)            <-------- bah
    vmovups    %xmm0, (%rdi)
    vzeroupper
    ret</pre>
        </div>
      </p>
      <hr>
      <span>You are receiving this mail because:</span>
      
      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>