[llvm-bugs] [Bug 34730] New: Efficient check that all vector elements are zero
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue Sep 26 01:22:28 PDT 2017
https://bugs.llvm.org/show_bug.cgi?id=34730
Bug ID: 34730
Summary: Efficient check that all vector elements are zero
Product: libraries
Version: 5.0
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: llvm at henning-thielemann.de
CC: llvm-bugs at lists.llvm.org
I want to check that all elements of a vector are zero.
I can use this e.g. for checking that two vectors are equal, by xor-ing them
and then check for a zero vector.
I have two implementations: The first one is generic and the second one is
tailored to a 64-bit architecture. Can I expect that the first one is optimized
to the second one? Currently, it is not.
$ cat null-vector.ll
target triple = "x86_64-pc-linux-gnu"
define i1 @test_zero0(<16 x i8>){
_L1:
%1 = extractelement <16 x i8> %0, i32 0
%2 = extractelement <16 x i8> %0, i32 1
%3 = extractelement <16 x i8> %0, i32 2
%4 = extractelement <16 x i8> %0, i32 3
%5 = extractelement <16 x i8> %0, i32 4
%6 = extractelement <16 x i8> %0, i32 5
%7 = extractelement <16 x i8> %0, i32 6
%8 = extractelement <16 x i8> %0, i32 7
%9 = extractelement <16 x i8> %0, i32 8
%10 = extractelement <16 x i8> %0, i32 9
%11 = extractelement <16 x i8> %0, i32 10
%12 = extractelement <16 x i8> %0, i32 11
%13 = extractelement <16 x i8> %0, i32 12
%14 = extractelement <16 x i8> %0, i32 13
%15 = extractelement <16 x i8> %0, i32 14
%16 = extractelement <16 x i8> %0, i32 15
%17 = or i8 %1, %2
%18 = or i8 %17, %3
%19 = or i8 %18, %4
%20 = or i8 %19, %5
%21 = or i8 %20, %6
%22 = or i8 %21, %7
%23 = or i8 %22, %8
%24 = or i8 %23, %9
%25 = or i8 %24, %10
%26 = or i8 %25, %11
%27 = or i8 %26, %12
%28 = or i8 %27, %13
%29 = or i8 %28, %14
%30 = or i8 %29, %15
%31 = or i8 %30, %16
%32 = icmp eq i8 %31, 0
ret i1 %32
}
define i1 @test_zero1(<16 x i8>){
_L1:
%vl = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 0, i32 1,
i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%vh = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 8, i32 9,
i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%wl = bitcast <8 x i8> %vl to i64
%wh = bitcast <8 x i8> %vh to i64
%w = or i64 %wl, %wh
%c = icmp eq i64 %w, 0
ret i1 %c
}
$ opt-5.0 -O3 <null-vector.ll | llc-5.0
.text
.file "<stdin>"
.globl test_zero0 # -- Begin function test_zero0
.p2align 4, 0x90
.type test_zero0, at function
test_zero0: # @test_zero0
# BB#0: # %_L1
movaps %xmm0, -24(%rsp)
movb -24(%rsp), %al
orb -23(%rsp), %al
orb -22(%rsp), %al
orb -21(%rsp), %al
orb -20(%rsp), %al
orb -19(%rsp), %al
orb -18(%rsp), %al
orb -17(%rsp), %al
orb -16(%rsp), %al
orb -15(%rsp), %al
orb -14(%rsp), %al
orb -13(%rsp), %al
orb -12(%rsp), %al
orb -11(%rsp), %al
orb -10(%rsp), %al
orb -9(%rsp), %al
sete %al
retq
.Lfunc_end0:
.size test_zero0, .Lfunc_end0-test_zero0
# -- End function
.globl test_zero1 # -- Begin function test_zero1
.p2align 4, 0x90
.type test_zero1, at function
test_zero1: # @test_zero1
# BB#0: # %_L1
movq %xmm0, %rax
pshufd $78, %xmm0, %xmm0 # xmm0 = xmm0[2,3,0,1]
movq %xmm0, %rcx
orq %rax, %rcx
sete %al
retq
.Lfunc_end1:
.size test_zero1, .Lfunc_end1-test_zero1
# -- End function
.section ".note.GNU-stack","", at progbits
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170926/84a0ff24/attachment.html>
More information about the llvm-bugs
mailing list