[llvm-bugs] [Bug 34209] New: Poor extension of extracted integer from vectors
via llvm-bugs
llvm-bugs at lists.llvm.org
Wed Aug 16 04:11:54 PDT 2017
https://bugs.llvm.org/show_bug.cgi?id=34209
Bug ID: 34209
Summary: Poor extension of extracted integer from vectors
Product: libraries
Version: trunk
Hardware: PC
OS: Windows NT
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: llvm-dev at redking.me.uk
CC: llvm-bugs at lists.llvm.org, spatel+llvm at rotateright.com
In the gather+swizzle+store pattern below we are extracting the <4 x i32>
indices as a <2 x i64> and then splitting each element into <2 x i32>.
This is fine and might reduce the pressure on the fpu-integer pipe, but we are
performing a ASHR+MOV to extract the upper i32 bits instead of just a LSHR.
Hopefully this is just missing a DAGCombine pattern, but I haven't
investigated.
https://godbolt.org/g/JAUnqy
#include <x86intrin.h>
#include <stdint.h>
void gather_swizzle(const __m128i *indices, float *buffer)
{
__m128i idx = *indices++;
uint32_t idx0 = static_cast<uint32_t>(_mm_extract_epi32(idx, 0));
uint32_t idx1 = static_cast<uint32_t>(_mm_extract_epi32(idx, 1));
uint32_t idx2 = static_cast<uint32_t>(_mm_extract_epi32(idx, 2));
uint32_t idx3 = static_cast<uint32_t>(_mm_extract_epi32(idx, 3));
float sum0 = buffer[idx0];
float sum1 = buffer[idx1];
float sum2 = buffer[idx2];
float sum3 = buffer[idx3];
buffer[0] = sum0;
buffer[1] = sum1;
buffer[2] = sum2;
buffer[3] = sum3;
}
llvm -mcpu=btver2 -mtriple=x86_64-unknown
define void @gather_swizzle((<2 x i64>* nocapture readonly, float* nocapture)
local_unnamed_addr #0 {
%3 = bitcast <2 x i64>* %0 to <4 x i32>*
%4 = load <4 x i32>, <4 x i32>* %3, align 16
%5 = extractelement <4 x i32> %4, i32 0
%6 = extractelement <4 x i32> %4, i32 1
%7 = extractelement <4 x i32> %4, i32 2
%8 = extractelement <4 x i32> %4, i32 3
%9 = zext i32 %5 to i64
%10 = getelementptr inbounds float, float* %1, i64 %9
%11 = bitcast float* %10 to i32*
%12 = load i32, i32* %11, align 4
%13 = zext i32 %6 to i64
%14 = getelementptr inbounds float, float* %1, i64 %13
%15 = bitcast float* %14 to i32*
%16 = load i32, i32* %15, align 4
%17 = zext i32 %7 to i64
%18 = getelementptr inbounds float, float* %1, i64 %17
%19 = bitcast float* %18 to i32*
%20 = load i32, i32* %19, align 4
%21 = zext i32 %8 to i64
%22 = getelementptr inbounds float, float* %1, i64 %21
%23 = bitcast float* %22 to i32*
%24 = load i32, i32* %23, align 4
%25 = bitcast float* %1 to i32*
store i32 %12, i32* %25, align 4
%26 = getelementptr inbounds float, float* %1, i64 1
%27 = bitcast float* %26 to i32*
store i32 %16, i32* %27, align 4
%28 = getelementptr inbounds float, float* %1, i64 2
%29 = bitcast float* %28 to i32*
store i32 %20, i32* %29, align 4
%30 = getelementptr inbounds float, float* %1, i64 3
%31 = bitcast float* %30 to i32*
store i32 %24, i32* %31, align 4
ret void
}
gather_swizzle(long long __vector(2) const*, float*): # @gather_swizzle(long
long __vector(2) const*, float*)
vmovdqa (%rdi), %xmm0
vpextrq $1, %xmm0, %rax
vmovq %xmm0, %rdx
movl %eax, %ecx
sarq $32, %rax
movl %edx, %edi
sarq $32, %rdx
movl %edx, %edx
movl %eax, %eax
movl (%rsi,%rdi,4), %edi
movl (%rsi,%rcx,4), %ecx
movl (%rsi,%rdx,4), %edx
movl (%rsi,%rax,4), %eax
movl %edi, (%rsi)
movl %edx, 4(%rsi)
movl %ecx, 8(%rsi)
movl %eax, 12(%rsi)
retq
Could be:
gather_swizzle(long long __vector(2) const*, float*): # @gather_swizzle(long
long __vector(2) const*, float*)
vmovdqa (%rdi), %xmm0
vpextrq $1, %xmm0, %rax
vmovq %xmm0, %rdx
movl %eax, %ecx
shrq $32, %rax
movl %edx, %edi
shrq $32, %rdx
movl (%rsi,%rdi,4), %edi
movl (%rsi,%rcx,4), %ecx
movl (%rsi,%rdx,4), %edx
movl (%rsi,%rax,4), %eax
movl %edi, (%rsi)
movl %edx, 4(%rsi)
movl %ecx, 8(%rsi)
movl %eax, 12(%rsi)
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170816/58d0fb58/attachment.html>
More information about the llvm-bugs
mailing list