[LLVMbugs] [Bug 19710] New: LLVM pessimizes simple array comparison loop

bugzilla-daemon at llvm.org bugzilla-daemon at llvm.org
Sun May 11 08:47:32 PDT 2014


            Bug ID: 19710
           Summary: LLVM pessimizes simple array comparison loop
           Product: libraries
           Version: trunk
          Hardware: All
                OS: All
            Status: NEW
          Severity: normal
          Priority: P
         Component: Loop Optimizer
          Assignee: unassignedbugs at nondot.org
          Reporter: st at quanttec.com
                CC: llvmbugs at cs.uiuc.edu
    Classification: Unclassified

The current trunk version of clang/LLVM pessimizes the loop in the following
example (by unnecessarily introducing a new loop induction variable):

bool arrayLess(const int* p1, long length1,
               const int* p2, long length2)
  const long minLength = length1 <= length2 ? length1 : length2;
  if (minLength > 0) {
    const int* const end = p1 + minLength;
    do {
      if (*p1 != *p2) {
        return *p1 < *p2;
    } while (p1 != end);
  return length1 < length2;

clang++ -O3 -S test.cpp yields the following assembly:

_Z9arrayLessPKilS0_l:                   # @_Z9arrayLessPKilS0_l
# BB#0:                                 # %entry
    cmpq    %rcx, %rsi
    movq    %rcx, %rax
    cmovleq    %rsi, %rax
    testq    %rax, %rax
    jle    .LBB0_4
# BB#1:                                 # %if.then
    movq    %rcx, %rax
    notq    %rax
    movq    %rsi, %r8
    notq    %r8
    cmpq    %r8, %rax
    cmovgeq    %rax, %r8
    shlq    $2, %r8
    movq    $-4, %r9
    subq    %r8, %r9
    .align    16, 0x90
.LBB0_2:                                # %do.body
                                        # =>This Inner Loop Header: Depth=1
    movl    (%rdx), %eax
    cmpl    %eax, (%rdi)
    jne    .LBB0_5
# BB#3:                                 # %if.end
                                        #   in Loop: Header=BB0_2 Depth=1
    addq    $4, %rdi
    addq    $4, %rdx
    addq    $-4, %r9
    jne    .LBB0_2
.LBB0_4:                                # %if.end7
    cmpq    %rcx, %rsi
.LBB0_5:                                # %return
    setl    %al
    .size    _Z9arrayLessPKilS0_l, .Ltmp0-_Z9arrayLessPKilS0_l

For comparison, GCC generates the following assembly:

    cmpq    %rcx, %rsi
    movq    %rcx, %rax
    cmovle    %rsi, %rax
    testq    %rax, %rax
    jle    .L2
    leaq    (%rdi,%rax,4), %r9
    jmp    .L5
    .p2align 4,,10
    .p2align 3
    addq    $4, %rdi
    addq    $4, %rdx
    cmpq    %r9, %rdi
    je    .L2
    movl    (%rdi), %eax
    movl    (%rdx), %r8d
    cmpl    %r8d, %eax
    je    .L3
    cmpl    %eax, %r8d
    setg    %al
    .p2align 4,,10
    .p2align 3
    cmpq    %rcx, %rsi
    setl    %al

