<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Efficient check that all vector elements are zero"
href="https://bugs.llvm.org/show_bug.cgi?id=34730">34730</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Efficient check that all vector elements are zero
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>5.0
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Linux
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Backend: X86
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>llvm@henning-thielemann.de
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>I want to check that all elements of a vector are zero.
I can use this e.g. for checking that two vectors are equal, by xor-ing them
and then check for a zero vector.
I have two implementations: The first one is generic and the second one is
tailored to a 64-bit architecture. Can I expect that the first one is optimized
to the second one? Currently, it is not.
$ cat null-vector.ll
target triple = "x86_64-pc-linux-gnu"
define i1 @test_zero0(<16 x i8>){
_L1:
%1 = extractelement <16 x i8> %0, i32 0
%2 = extractelement <16 x i8> %0, i32 1
%3 = extractelement <16 x i8> %0, i32 2
%4 = extractelement <16 x i8> %0, i32 3
%5 = extractelement <16 x i8> %0, i32 4
%6 = extractelement <16 x i8> %0, i32 5
%7 = extractelement <16 x i8> %0, i32 6
%8 = extractelement <16 x i8> %0, i32 7
%9 = extractelement <16 x i8> %0, i32 8
%10 = extractelement <16 x i8> %0, i32 9
%11 = extractelement <16 x i8> %0, i32 10
%12 = extractelement <16 x i8> %0, i32 11
%13 = extractelement <16 x i8> %0, i32 12
%14 = extractelement <16 x i8> %0, i32 13
%15 = extractelement <16 x i8> %0, i32 14
%16 = extractelement <16 x i8> %0, i32 15
%17 = or i8 %1, %2
%18 = or i8 %17, %3
%19 = or i8 %18, %4
%20 = or i8 %19, %5
%21 = or i8 %20, %6
%22 = or i8 %21, %7
%23 = or i8 %22, %8
%24 = or i8 %23, %9
%25 = or i8 %24, %10
%26 = or i8 %25, %11
%27 = or i8 %26, %12
%28 = or i8 %27, %13
%29 = or i8 %28, %14
%30 = or i8 %29, %15
%31 = or i8 %30, %16
%32 = icmp eq i8 %31, 0
ret i1 %32
}
define i1 @test_zero1(<16 x i8>){
_L1:
%vl = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 0, i32 1,
i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%vh = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 8, i32 9,
i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%wl = bitcast <8 x i8> %vl to i64
%wh = bitcast <8 x i8> %vh to i64
%w = or i64 %wl, %wh
%c = icmp eq i64 %w, 0
ret i1 %c
}
$ opt-5.0 -O3 <null-vector.ll | llc-5.0
.text
.file "<stdin>"
.globl test_zero0 # -- Begin function test_zero0
.p2align 4, 0x90
.type test_zero0,@function
test_zero0: # @test_zero0
# BB#0: # %_L1
movaps %xmm0, -24(%rsp)
movb -24(%rsp), %al
orb -23(%rsp), %al
orb -22(%rsp), %al
orb -21(%rsp), %al
orb -20(%rsp), %al
orb -19(%rsp), %al
orb -18(%rsp), %al
orb -17(%rsp), %al
orb -16(%rsp), %al
orb -15(%rsp), %al
orb -14(%rsp), %al
orb -13(%rsp), %al
orb -12(%rsp), %al
orb -11(%rsp), %al
orb -10(%rsp), %al
orb -9(%rsp), %al
sete %al
retq
.Lfunc_end0:
.size test_zero0, .Lfunc_end0-test_zero0
# -- End function
.globl test_zero1 # -- Begin function test_zero1
.p2align 4, 0x90
.type test_zero1,@function
test_zero1: # @test_zero1
# BB#0: # %_L1
movq %xmm0, %rax
pshufd $78, %xmm0, %xmm0 # xmm0 = xmm0[2,3,0,1]
movq %xmm0, %rcx
orq %rax, %rcx
sete %al
retq
.Lfunc_end1:
.size test_zero1, .Lfunc_end1-test_zero1
# -- End function
.section ".note.GNU-stack","",@progbits</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>