<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - improvement for several broadcast"
href="https://bugs.llvm.org/show_bug.cgi?id=34666">34666</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>improvement for several broadcast
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Scalar Optimizations
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>Jina.nahias@intel.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>for the following c example:
__m512i foo(__m512i __O, __mmask64 __M, char __A){
__m512i res= _mm512_mask_set1_epi8(__O, -1, __A);
__m512i res1= _mm512_mask_set1_epi8(__O, __M, __A);
__m512i res2= _mm512_maskz_set1_epi8( __M, __A);
__m512i res3= res+res1+res2;
return res3;
}
LLVM currently generates the following IR:
define <8 x i64> @foo(<8 x i64> %__O, i64 %__M, i8 signext %__A) {
%vecinit.i.i = insertelement <64 x i8> undef, i8 %__A, i32 0
%vecinit63.i.i = shufflevector <64 x i8> %vecinit.i.i, <64 x i8> undef, <64 x
i32> zeroinitializer
%0 = bitcast <64 x i8> %vecinit63.i.i to <8 x i64>
%1 = bitcast <8 x i64> %__O to <64 x i8>
%2 = bitcast i64 %__M to <64 x i1>
%3 = select <64 x i1> %2, <64 x i8> %vecinit63.i.i, <64 x i8> %1
%4 = bitcast <64 x i8> %3 to <8 x i64>
%5 = select <64 x i1> %2, <64 x i8> %vecinit63.i.i, <64 x i8> zeroinitializer
%6 = bitcast <64 x i8> %5 to <8 x i64>
%add = add <8 x i64> %4, %0
%add3 = add <8 x i64> %add, %6
ret <8 x i64> %add3
}
which gets translated to this assembly:
vpbroadcastb %esi, %zmm1
kmovq %rdi, %k1
vpbroadcastb %esi, %zmm0 {%k1}
vpaddq %zmm1, %zmm0, %zmm0
vpbroadcastb %esi, %zmm1 {%k1} {z}
vpaddq %zmm1, %zmm0, %zmm0
retq
if we could generate the following IR:
define <64 x i8> @foo(i8 %a, <64 x i8> %passthru, i64 %mask) {
%m = bitcast i64 %mask to <64 x i1>
%add0 = add i8 %a, %a
%add1 = add i8 %add0, %add0
%v0 = insertelement <64 x i8> undef, i8 %add1, i32 0
%v1 = shufflevector <64 x i8> %v0, <64 x i8> undef, <64 x i32>
zeroinitializer
%w0 = insertelement <64 x i8> undef, i8 %a, i32 0
%w1 = shufflevector <64 x i8> %w0, <64 x i8> undef, <64 x i32>
zeroinitializer
%add2 = add <64 x i8> %w1, %passthru
%v2 = select <64 x i1> %m, <64 x i8> %v1, <64 x i8> %add2
ret <64 x i8> %v2
}
we'll get this assembly:
kmovq %rsi, %k1
vpbroadcastb %edi, %zmm1
shlb $2, %dil
vpaddb %zmm0, %zmm1, %zmm0
vpbroadcastb %edi, %zmm0 {%k1}
which saves broadcast instructions</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>