[llvm-bugs] [Bug 49296] New: [Regression] Suboptimal loop exit codegen
via llvm-bugs
llvm-bugs at lists.llvm.org
Sat Feb 20 00:51:27 PST 2021
https://bugs.llvm.org/show_bug.cgi?id=49296
Bug ID: 49296
Summary: [Regression] Suboptimal loop exit codegen
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Loop Optimizer
Assignee: unassignedbugs at nondot.org
Reporter: david.bolvansky at gmail.com
CC: llvm-bugs at lists.llvm.org
#include <immintrin.h>
#include <stdint.h>
typedef uint32_t u32v8 __attribute__((vector_size(32)));
void double_load(int8_t *__restrict out, const int8_t *__restrict input)
{
u32v8 *vin = (u32v8 *)input;
u32v8 *vout = (u32v8 *)out;
for (unsigned i=0 ; i<1024 ; i+=32){
u32v8 in = *vin++;
*vout++ = in | (in >> 4);
}
}
Flags: -O3 -mavx2 -fno-unroll-loops
LLVM 10 and newer:
double_load(signed char*, signed char const*): #
@double_load(signed char*, signed char const*)
xorl %eax, %eax
.LBB0_1: # =>This Inner Loop Header: Depth=1
vmovdqa (%rsi,%rax), %ymm0
vpsrld $4, %ymm0, %ymm1
vpor %ymm0, %ymm1, %ymm0
vmovdqa %ymm0, (%rdi,%rax)
addq $32, %rax
leal -32(%rax), %ecx
cmpl $992, %ecx # imm = 0x3E0
jb .LBB0_1
vzeroupper
retq
LLVM 9:
double_load(signed char*, signed char const*): #
@double_load(signed char*, signed char const*)
xorl %eax, %eax
.LBB0_1: # =>This Inner Loop Header: Depth=1
vmovdqa (%rsi,%rax), %ymm0
vpsrld $4, %ymm0, %ymm1
vpor %ymm0, %ymm1, %ymm0
vmovdqa %ymm0, (%rdi,%rax)
addq $32, %rax
cmpl $1024, %eax # imm = 0x400
jb .LBB0_1
vzeroupper
retq
LLVM IR comparison:
LLVM 10+:
%15 = add nuw nsw i32 %8, 32
%16 = icmp ult i32 %8, 992
LLVM 9:
%15 = add nuw nsw i32 %8, 32
%16 = icmp ult i32 %15, 1024
Codegen: https://godbolt.org/z/GzaTPj
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20210220/4f8240b4/attachment.html>
More information about the llvm-bugs
mailing list