[llvm-bugs] [Bug 49296] New: [Regression] Suboptimal loop exit codegen

via llvm-bugs llvm-bugs at lists.llvm.org
Sat Feb 20 00:51:27 PST 2021


https://bugs.llvm.org/show_bug.cgi?id=49296

            Bug ID: 49296
           Summary: [Regression] Suboptimal loop exit codegen
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Loop Optimizer
          Assignee: unassignedbugs at nondot.org
          Reporter: david.bolvansky at gmail.com
                CC: llvm-bugs at lists.llvm.org

#include <immintrin.h>
#include <stdint.h>

typedef uint32_t u32v8 __attribute__((vector_size(32)));
void double_load(int8_t *__restrict out, const int8_t *__restrict input)
{
    u32v8 *vin = (u32v8 *)input;
    u32v8 *vout = (u32v8 *)out;
    for (unsigned i=0 ; i<1024 ; i+=32){
        u32v8 in = *vin++;
        *vout++ = in | (in >> 4);
    }
}


Flags: -O3 -mavx2 -fno-unroll-loops

LLVM 10 and newer:
double_load(signed char*, signed char const*):                   #
@double_load(signed char*, signed char const*)
        xorl    %eax, %eax
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        vmovdqa (%rsi,%rax), %ymm0
        vpsrld  $4, %ymm0, %ymm1
        vpor    %ymm0, %ymm1, %ymm0
        vmovdqa %ymm0, (%rdi,%rax)
        addq    $32, %rax
        leal    -32(%rax), %ecx
        cmpl    $992, %ecx              # imm = 0x3E0
        jb      .LBB0_1
        vzeroupper
        retq


LLVM 9:
double_load(signed char*, signed char const*):                   #
@double_load(signed char*, signed char const*)
        xorl    %eax, %eax
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        vmovdqa (%rsi,%rax), %ymm0
        vpsrld  $4, %ymm0, %ymm1
        vpor    %ymm0, %ymm1, %ymm0
        vmovdqa %ymm0, (%rdi,%rax)
        addq    $32, %rax
        cmpl    $1024, %eax             # imm = 0x400
        jb      .LBB0_1
        vzeroupper
        retq

LLVM IR comparison:
LLVM 10+:
  %15 = add nuw nsw i32 %8, 32
  %16 = icmp ult i32 %8, 992

LLVM 9:
  %15 = add nuw nsw i32 %8, 32
  %16 = icmp ult i32 %15, 1024



Codegen: https://godbolt.org/z/GzaTPj

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20210220/4f8240b4/attachment.html>


More information about the llvm-bugs mailing list