[llvm-bugs] [Bug 31719] New: LLVM maintain useless state when using adc

Sun Jan 22 17:30:27 PST 2017

https://llvm.org/bugs/show_bug.cgi?id=31719

            Bug ID: 31719
           Summary: LLVM maintain useless state when using adc
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: normal
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: deadalnix at gmail.com
                CC: llvm-bugs at lists.llvm.org
    Classification: Unclassified

Sample IR (optimized):

; Function Attrs: norecurse nounwind readonly
define %scalar @foo(%scalar* nocapture readonly %this, %scalar %arg.b)
local_unnamed_addr #1 {
entry:
  %0 = extractvalue %scalar %arg.b, 0
  %.elt = extractvalue [4 x i64] %0, 0
  %.elt24 = extractvalue [4 x i64] %0, 1
  %.elt26 = extractvalue [4 x i64] %0, 2
  %.elt28 = extractvalue [4 x i64] %0, 3
  %1 = getelementptr inbounds %scalar , %scalar* %this, i64 0, i32 0, i64 0
  %2 = load i64, i64* %1, align 8
  %3 = zext i64 %2 to i128
  %4 = zext i64 %.elt to i128
  %5 = add nuw nsw i128 %3, %4
  %6 = trunc i128 %5 to i64
  %7 = lshr i128 %5, 64
  %8 = getelementptr inbounds %scalar , %scalar * %this, i64 0, i32 0, i64 1
  %9 = load i64, i64* %8, align 8
  %10 = zext i64 %9 to i128
  %11 = zext i64 %.elt24 to i128
  %12 = add nuw nsw i128 %10, %11
  %13 = add nuw nsw i128 %12, %7
  %14 = trunc i128 %13 to i64
  %15 = lshr i128 %13, 64
  %16 = getelementptr inbounds %scalar , %scalar* %this, i64 0, i32 0, i64 2
  %17 = load i64, i64* %16, align 8
  %18 = zext i64 %17 to i128
  %19 = zext i64 %.elt26 to i128
  %20 = add nuw nsw i128 %18, %19
  %21 = add nuw nsw i128 %20, %15
  %22 = trunc i128 %21 to i64
  %23 = lshr i128 %21, 64
  %24 = getelementptr inbounds %scalar , %scalar* %this, i64 0, i32 0, i64 3
  %25 = load i64, i64* %24, align 8
  %26 = zext i64 %25 to i128
  %27 = zext i64 %.elt28 to i128
  %28 = add nuw nsw i128 %26, %27
  %29 = add nuw nsw i128 %28, %23
  %30 = trunc i128 %29 to i64
  %31 = insertvalue [4 x i64] undef, i64 %6, 0
  %32 = insertvalue [4 x i64] %31, i64 %14, 1
  %33 = insertvalue [4 x i64] %32, i64 %22, 2
  %34 = insertvalue [4 x i64] %33, i64 %30, 3
  %35 = insertvalue %S6crypto5field6Scalar undef, [4 x i64] %34, 0
  ret %scalar%35
}

attributes #0 = { norecurse nounwind readnone }
attributes #1 = { norecurse nounwind readonly }

Codegen:

foo:
addq    (%rsi), %rdx
    sbbq    %r10, %r10
    andl    $1, %r10d
    addq    8(%rsi), %rcx
    sbbq    %r11, %r11
    andl    $1, %r11d
    addq    %r10, %rcx
    adcq    $0, %r11
    addq    16(%rsi), %r8
    sbbq    %rax, %rax
    andl    $1, %eax
    addq    %r11, %r8
    adcq    $0, %rax
    addq    24(%rsi), %r9
    addq    %rax, %r9
    movq    %rdx, (%rdi)
    movq    %rcx, 8(%rdi)
    movq    %r8, 16(%rdi)
    movq    %r9, 24(%rdi)
    movq    %rdi, %rax
    retq

While LLVM is able to leverage the use of the adc instruction (good) it is
unclear why it is doing so in RAX and then adding RAX rather than using ADC
right away. See for instance:

    adcq    $0, %rax
    addq    24(%rsi), %r9
    addq    %rax, %r9

Uses adc to store the carry in RAX and then perform 2 additions, when it could
simply do

    adcq    24(%rsi), %r9

These routine are at the core of various crypto libraries and need to be fast.
Any chance to get better codegen here ?

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170123/a349e33f/attachment.html>