<html>

    <head>

      <base href="https://llvm.org/bugs/" />

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW --- - LLVM maintain useless state when using adc"

   href="https://llvm.org/bugs/show_bug.cgi?id=31719">31719</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>LLVM maintain useless state when using adc

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Windows NT

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>normal

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Backend: X86

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>deadalnix@gmail.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr>

        <tr>

          <th>Classification</th>

          <td>Unclassified

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Sample IR (optimized):

; Function Attrs: norecurse nounwind readonly

define %scalar @foo(%scalar* nocapture readonly %this, %scalar %arg.b)

local_unnamed_addr #1 {

entry:

  %0 = extractvalue %scalar %arg.b, 0

  %.elt = extractvalue [4 x i64] %0, 0

  %.elt24 = extractvalue [4 x i64] %0, 1

  %.elt26 = extractvalue [4 x i64] %0, 2

  %.elt28 = extractvalue [4 x i64] %0, 3

  %1 = getelementptr inbounds %scalar , %scalar* %this, i64 0, i32 0, i64 0

  %2 = load i64, i64* %1, align 8

  %3 = zext i64 %2 to i128

  %4 = zext i64 %.elt to i128

  %5 = add nuw nsw i128 %3, %4

  %6 = trunc i128 %5 to i64

  %7 = lshr i128 %5, 64

  %8 = getelementptr inbounds %scalar , %scalar * %this, i64 0, i32 0, i64 1

  %9 = load i64, i64* %8, align 8

  %10 = zext i64 %9 to i128

  %11 = zext i64 %.elt24 to i128

  %12 = add nuw nsw i128 %10, %11

  %13 = add nuw nsw i128 %12, %7

  %14 = trunc i128 %13 to i64

  %15 = lshr i128 %13, 64

  %16 = getelementptr inbounds %scalar , %scalar* %this, i64 0, i32 0, i64 2

  %17 = load i64, i64* %16, align 8

  %18 = zext i64 %17 to i128

  %19 = zext i64 %.elt26 to i128

  %20 = add nuw nsw i128 %18, %19

  %21 = add nuw nsw i128 %20, %15

  %22 = trunc i128 %21 to i64

  %23 = lshr i128 %21, 64

  %24 = getelementptr inbounds %scalar , %scalar* %this, i64 0, i32 0, i64 3

  %25 = load i64, i64* %24, align 8

  %26 = zext i64 %25 to i128

  %27 = zext i64 %.elt28 to i128

  %28 = add nuw nsw i128 %26, %27

  %29 = add nuw nsw i128 %28, %23

  %30 = trunc i128 %29 to i64

  %31 = insertvalue [4 x i64] undef, i64 %6, 0

  %32 = insertvalue [4 x i64] %31, i64 %14, 1

  %33 = insertvalue [4 x i64] %32, i64 %22, 2

  %34 = insertvalue [4 x i64] %33, i64 %30, 3

  %35 = insertvalue %S6crypto5field6Scalar undef, [4 x i64] %34, 0

  ret %scalar%35

}

attributes #0 = { norecurse nounwind readnone }

attributes #1 = { norecurse nounwind readonly }

Codegen:

foo:

addq    (%rsi), %rdx

    sbbq    %r10, %r10

    andl    $1, %r10d

    addq    8(%rsi), %rcx

    sbbq    %r11, %r11

    andl    $1, %r11d

    addq    %r10, %rcx

    adcq    $0, %r11

    addq    16(%rsi), %r8

    sbbq    %rax, %rax

    andl    $1, %eax

    addq    %r11, %r8

    adcq    $0, %rax

    addq    24(%rsi), %r9

    addq    %rax, %r9

    movq    %rdx, (%rdi)

    movq    %rcx, 8(%rdi)

    movq    %r8, 16(%rdi)

    movq    %r9, 24(%rdi)

    movq    %rdi, %rax

    retq

While LLVM is able to leverage the use of the adc instruction (good) it is

unclear why it is doing so in RAX and then adding RAX rather than using ADC

right away. See for instance:

    adcq    $0, %rax

    addq    24(%rsi), %r9

    addq    %rax, %r9

Uses adc to store the carry in RAX and then perform 2 additions, when it could

simply do

    adcq    24(%rsi), %r9

These routine are at the core of various crypto libraries and need to be fast.

Any chance to get better codegen here ?</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>