[llvm-bugs] [Bug 34666] New: improvement for several broadcast
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue Sep 19 04:44:17 PDT 2017
https://bugs.llvm.org/show_bug.cgi?id=34666
Bug ID: 34666
Summary: improvement for several broadcast
Product: libraries
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: Scalar Optimizations
Assignee: unassignedbugs at nondot.org
Reporter: Jina.nahias at intel.com
CC: llvm-bugs at lists.llvm.org
for the following c example:
__m512i foo(__m512i __O, __mmask64 __M, char __A){
__m512i res= _mm512_mask_set1_epi8(__O, -1, __A);
__m512i res1= _mm512_mask_set1_epi8(__O, __M, __A);
__m512i res2= _mm512_maskz_set1_epi8( __M, __A);
__m512i res3= res+res1+res2;
return res3;
}
LLVM currently generates the following IR:
define <8 x i64> @foo(<8 x i64> %__O, i64 %__M, i8 signext %__A) {
%vecinit.i.i = insertelement <64 x i8> undef, i8 %__A, i32 0
%vecinit63.i.i = shufflevector <64 x i8> %vecinit.i.i, <64 x i8> undef, <64 x
i32> zeroinitializer
%0 = bitcast <64 x i8> %vecinit63.i.i to <8 x i64>
%1 = bitcast <8 x i64> %__O to <64 x i8>
%2 = bitcast i64 %__M to <64 x i1>
%3 = select <64 x i1> %2, <64 x i8> %vecinit63.i.i, <64 x i8> %1
%4 = bitcast <64 x i8> %3 to <8 x i64>
%5 = select <64 x i1> %2, <64 x i8> %vecinit63.i.i, <64 x i8> zeroinitializer
%6 = bitcast <64 x i8> %5 to <8 x i64>
%add = add <8 x i64> %4, %0
%add3 = add <8 x i64> %add, %6
ret <8 x i64> %add3
}
which gets translated to this assembly:
vpbroadcastb %esi, %zmm1
kmovq %rdi, %k1
vpbroadcastb %esi, %zmm0 {%k1}
vpaddq %zmm1, %zmm0, %zmm0
vpbroadcastb %esi, %zmm1 {%k1} {z}
vpaddq %zmm1, %zmm0, %zmm0
retq
if we could generate the following IR:
define <64 x i8> @foo(i8 %a, <64 x i8> %passthru, i64 %mask) {
%m = bitcast i64 %mask to <64 x i1>
%add0 = add i8 %a, %a
%add1 = add i8 %add0, %add0
%v0 = insertelement <64 x i8> undef, i8 %add1, i32 0
%v1 = shufflevector <64 x i8> %v0, <64 x i8> undef, <64 x i32>
zeroinitializer
%w0 = insertelement <64 x i8> undef, i8 %a, i32 0
%w1 = shufflevector <64 x i8> %w0, <64 x i8> undef, <64 x i32>
zeroinitializer
%add2 = add <64 x i8> %w1, %passthru
%v2 = select <64 x i1> %m, <64 x i8> %v1, <64 x i8> %add2
ret <64 x i8> %v2
}
we'll get this assembly:
kmovq %rsi, %k1
vpbroadcastb %edi, %zmm1
shlb $2, %dil
vpaddb %zmm0, %zmm1, %zmm0
vpbroadcastb %edi, %zmm0 {%k1}
which saves broadcast instructions
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170919/657316a5/attachment-0001.html>
More information about the llvm-bugs
mailing list