<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - __builtin_align_down() - some opts missing"
   href="https://bugs.llvm.org/show_bug.cgi?id=44448">44448</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>__builtin_align_down() - some opts missing
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: X86
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>lebedev.ri@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>craig.topper@gmail.com, llvm-bugs@lists.llvm.org, llvm-dev@redking.me.uk, spatel+llvm@rotateright.com
          </td>
        </tr></table>
      <p>
        <div>
        <pre>The `__builtin_align_down()` is naive, sane version, it results in suboptimal
assembly.
The `__builtin_align_down_GOOD()` results in optimal assembly,
identical to `__builtin_align_down_UNFRIENDLY()`.

I'm not really sure what folds are missing.
I would say that `@_Z20__builtin_align_downPcm()` is IR-canonical,
while `@_Z25__builtin_align_down_GOODPcm()` isn't,
even though the final assembly is optimal for the 'non-canonical' one.

(1) Their difference is this IR-level fold: <a href="https://rise4fun.com/Alive/sltV">https://rise4fun.com/Alive/sltV</a>
^ should we be hoisting that IR `neg`, or sinking it?

Name: __builtin_align_down
  %neg = sub i64 0, %alignment
  %and = and i64 %0, %neg
  %r = sub i64 %and, %0
=>
  %neg.not = add i64 %alignment, -1
  %sub1 = and i64 %neg.not, %0
  %r = sub i64 0, %sub1


(2) In backend, we are clearly missing the following fold:
<a href="https://rise4fun.com/Alive/ZVdp">https://rise4fun.com/Alive/ZVdp</a>

Name: ptr - (ptr & (alignment-1))  ->  ptr & (0 - alignment)
  %mask = add i64 %alignment, -1
  %bias = and i64 %ptr, %mask
  %r = sub i64 %ptr, %bias
=>
  %highbitmask = sub i64 0, %alignment
  %r = and i64 %ptr, %highbitmask

Not sure if there is more general fold than that missing?


<a href="https://godbolt.org/z/GBAsiB">https://godbolt.org/z/GBAsiB</a>

#include <cstdint>
#include <cstddef>

char* __attribute__((alloc_align(2)))
__builtin_align_down(char* maybe_overbiased_ptr, size_t alignment) {
    size_t mask = alignment - 1;
    uintptr_t maybe_overbiased_intptr = (uintptr_t)maybe_overbiased_ptr;
    uintptr_t aligned_intptr = maybe_overbiased_intptr & (~mask);
    uintptr_t bias = maybe_overbiased_intptr - aligned_intptr;
    return maybe_overbiased_ptr - bias;
}

char* __attribute__((alloc_align(2)))
__builtin_align_down_GOOD(char* maybe_overbiased_ptr, size_t alignment) {
    size_t mask = alignment - 1;
    uintptr_t maybe_overbiased_intptr = (uintptr_t)maybe_overbiased_ptr;
    uintptr_t aligned_intptr = maybe_overbiased_intptr & (~mask);
    uintptr_t bias = aligned_intptr - maybe_overbiased_intptr;
    return maybe_overbiased_ptr + bias;
}

char* __attribute__((alloc_align(2)))
__builtin_align_up(char* maybe_underbiased_ptr, size_t alignment) {
    size_t mask = alignment - 1;
    uintptr_t maybe_underbiased_intptr = (uintptr_t)maybe_underbiased_ptr;
    uintptr_t aligned_biased_intptr = maybe_underbiased_intptr + mask;
    uintptr_t aligned_intptr = aligned_biased_intptr & (~mask);
    uintptr_t bias = aligned_intptr - maybe_underbiased_intptr;
    return maybe_underbiased_ptr + bias;
}

char* __attribute__((alloc_align(2)))
__builtin_align_down_UNFRIENDLY(char* maybe_overbiased_ptr, size_t alignment) {
    size_t mask = alignment - 1;
    uintptr_t maybe_overbiased_intptr = (uintptr_t)maybe_overbiased_ptr;
    uintptr_t aligned_intptr = maybe_overbiased_intptr & (~mask);
    return (char*)aligned_intptr;
}

char* __attribute__((alloc_align(2)))
__builtin_align_up_UNFRIENDLY(char* maybe_underbiased_ptr, size_t alignment) {
    size_t mask = alignment - 1;
    uintptr_t maybe_underbiased_intptr = (uintptr_t)maybe_underbiased_ptr;
    uintptr_t aligned_biased_intptr = maybe_underbiased_intptr + mask;
    uintptr_t aligned_intptr = aligned_biased_intptr & (~mask);
    return (char*)aligned_intptr;
}

define dso_local i8* @_Z20__builtin_align_downPcm(i8* %maybe_overbiased_ptr,
i64 %alignment) local_unnamed_addr #0 {
  %0 = ptrtoint i8* %maybe_overbiased_ptr to i64
  %neg.not = add i64 %alignment, -1
  %sub1 = and i64 %neg.not, %0
  %idx.neg = sub i64 0, %sub1
  %add.ptr = getelementptr inbounds i8, i8* %maybe_overbiased_ptr, i64 %idx.neg
  ret i8* %add.ptr
}
define dso_local i8* @_Z25__builtin_align_down_GOODPcm(i8*
%maybe_overbiased_ptr, i64 %alignment) local_unnamed_addr #0 {
  %0 = ptrtoint i8* %maybe_overbiased_ptr to i64
  %neg = sub i64 0, %alignment
  %and = and i64 %0, %neg
  %sub1 = sub i64 %and, %0
  %add.ptr = getelementptr inbounds i8, i8* %maybe_overbiased_ptr, i64 %sub1
  ret i8* %add.ptr
}

__builtin_align_down(char*, unsigned long):            #
@__builtin_align_down(char*, unsigned long)
        dec     rsi
        mov     rax, rdi
        and     rsi, rdi
        sub     rax, rsi
        ret
__builtin_align_down_GOOD(char*, unsigned long):       #
@__builtin_align_down_GOOD(char*, unsigned long)
        mov     rax, rsi
        neg     rax
        and     rax, rdi
        ret
__builtin_align_up(char*, unsigned long):              #
@__builtin_align_up(char*, unsigned long)
        lea     rax, [rsi + rdi - 1]
        neg     rsi
        and     rax, rsi
        ret


__builtin_align_down_UNFRIENDLY(char*, unsigned long): #
@__builtin_align_down_UNFRIENDLY(char*, unsigned long)
        mov     rax, rsi
        neg     rax
        and     rax, rdi
        ret
__builtin_align_up_UNFRIENDLY(char*, unsigned long):   #
@__builtin_align_up_UNFRIENDLY(char*, unsigned long)
        lea     rax, [rsi + rdi - 1]
        neg     rsi
        and     rax, rsi
        ret</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>