<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - Missing optimization: SUBCARRY (SBB) not used"

   href="https://bugs.llvm.org/show_bug.cgi?id=49225">49225</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>Missing optimization: SUBCARRY (SBB) not used

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Common Code Generator Code

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>chfast@gmail.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>This reports an issue where a sequence of additions with carry propagation are

nicely optimized to ADDCARRY nodes / ADC instruction. While the symmetric code

for subtraction is not optimized to SUBCARRY / SBB.

The reason may be that in the DAG combiner the combineCarryDiamond() and/or the

combineADDCARRYDiamond() is not properly specified for SUB case. Or

canonicalization causes the SUB case not being recognized (you can see that the

last SUB in sequence is converted to ADD).

<a href="https://godbolt.org/z/7cr1o9">https://godbolt.org/z/7cr1o9</a>

using uint64_t = unsigned long;

bool uaddo(uint64_t al, uint64_t ah, uint64_t bl, uint64_t bh, uint64_t* ol,

uint64_t* oh) noexcept

{

    uint64_t l = al + bl;

    bool k0 = l < al;

    uint64_t t = ah + bh;

    bool k1 = t < ah;

    uint64_t h = t + k0;

    bool k2 = h < t;

    bool k = k1 || k2;

    *ol = l;

    *oh = h;

    return k;

}

bool usubo(uint64_t al, uint64_t ah, uint64_t bl, uint64_t bh, uint64_t* ol,

uint64_t* oh) noexcept

{

    uint64_t l = al - bl;

    bool k0 = l > al;

    uint64_t t = ah - bh;

    bool k1 = t > ah;

    uint64_t h = t - k0;

    bool k2 = h > t;

    bool k = k1 || k2;

    *ol = l;

    *oh = h;

    return k;

}

define dso_local zeroext i1 @_Z5uaddommmmPmS_(i64 %0, i64 %1, i64 %2, i64 %3,

i64* nocapture %4, i64* nocapture %5) local_unnamed_addr #0 {

  %7 = add i64 %2, %0

  %8 = icmp ult i64 %7, %0

  %9 = add i64 %3, %1

  %10 = icmp ult i64 %9, %1

  %11 = zext i1 %8 to i64

  %12 = add i64 %9, %11

  %13 = icmp ult i64 %12, %9

  %14 = or i1 %10, %13

  store i64 %7, i64* %4, align 8, !tbaa !2

  store i64 %12, i64* %5, align 8, !tbaa !2

  ret i1 %14

}

define dso_local zeroext i1 @_Z5usubommmmPmS_(i64 %0, i64 %1, i64 %2, i64 %3,

i64* nocapture %4, i64* nocapture %5) local_unnamed_addr #0 {

  %7 = sub i64 %0, %2

  %8 = icmp ult i64 %0, %2

  %9 = sub i64 %1, %3

  %10 = icmp ult i64 %1, %3

  %11 = sext i1 %8 to i64

  %12 = add i64 %9, %11

  %13 = icmp ugt i64 %12, %9

  %14 = or i1 %10, %13

  store i64 %7, i64* %4, align 8, !tbaa !2

  store i64 %12, i64* %5, align 8, !tbaa !2

  ret i1 %14

}

attributes #0 = { nofree norecurse nounwind uwtable willreturn writeonly

mustprogress "disable-tail-calls"="false" "frame-pointer"="none"

"less-precise-fpmad"="false" "min-legal-vector-width"="0"

"no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false"

"no-signed-zeros-fp-math"="false" "no-trapping-math"="true"

"stack-protector-buffer-size"="8" "target-cpu"="x86-64"

"target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic"

"unsafe-fp-math"="false" "use-soft-float"="false" }

uaddo(unsigned long, unsigned long, unsigned long, unsigned long, unsigned

long*, unsigned long*):                       # @uaddo(unsigned long, unsigned

long, unsigned long, unsigned long, unsigned long*, unsigned long*)

        add     rdi, rdx

        adc     rsi, rcx

        setb    al

        mov     qword ptr [r8], rdi

        mov     qword ptr [r9], rsi

        ret

usubo(unsigned long, unsigned long, unsigned long, unsigned long, unsigned

long*, unsigned long*):                       # @usubo(unsigned long, unsigned

long, unsigned long, unsigned long, unsigned long*, unsigned long*)

        sub     rsi, rcx

        setb    cl

        sub     rdi, rdx

        mov     rdx, rsi

        sbb     rdx, 0

        cmp     rdx, rsi

        seta    al

        or      al, cl

        mov     qword ptr [r8], rdi

        mov     qword ptr [r9], rdx

        ret</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>