<html>
    <head>
      <base href="http://llvm.org/bugs/" />
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW --- - missing load widening"
   href="http://llvm.org/bugs/show_bug.cgi?id=18667">18667</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>missing load widening
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Common Code Generator Code
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>nlewycky@google.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvmbugs@cs.uiuc.edu
          </td>
        </tr>

        <tr>
          <th>Classification</th>
          <td>Unclassified
          </td>
        </tr></table>
      <p>
        <div>
        <pre>In code like this:

struct {
  unsigned a : 24;
  unsigned char b;
} S;
int test() {
  return S.a;
}

we can perform a 4-byte load to read 'a', though not a 4-byte store. The clang
-O0 IR for this is:

%struct.anon = type { [3 x i8], i8 }

@S = common global %struct.anon zeroinitializer, align 4

define i32 @test() nounwind uwtable {
entry:
  %bf.load = load i24* bitcast (%struct.anon* @S to i24*), align 4
  %bf.cast = zext i24 %bf.load to i32
  ret i32 %bf.cast
}

which has all the information necessary; a load with align K can load a minimum
of K bytes. Here's what llc does to that IR:

# BB#0:                                 # %entry
        movzbl  S+2(%rip), %ecx
        shll    $16, %ecx
        movzwl  S(%rip), %eax
        orl     %ecx, %eax
        retq

That's a 2-byte load and a 1-byte load. I think a 4-byte load then mask (for
zext) would be better. For example, gcc's output:

test:
.LFB0:
        .cfi_startproc
        movl    S(%rip), %eax
        andl    $16777215, %eax
        ret</pre>
        </div>
      </p>
      <hr>
      <span>You are receiving this mail because:</span>
      
      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>