<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - Efficient check that all vector elements are zero"

   href="https://bugs.llvm.org/show_bug.cgi?id=34730">34730</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>Efficient check that all vector elements are zero

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>5.0

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Backend: X86

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>llvm@henning-thielemann.de

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>I want to check that all elements of a vector are zero.

I can use this e.g. for checking that two vectors are equal, by xor-ing them

and then check for a zero vector.

I have two implementations: The first one is generic and the second one is

tailored to a 64-bit architecture. Can I expect that the first one is optimized

to the second one? Currently, it is not.

$ cat null-vector.ll

target triple = "x86_64-pc-linux-gnu"

define i1 @test_zero0(<16 x i8>){

_L1:

  %1  = extractelement <16 x i8> %0, i32 0

  %2  = extractelement <16 x i8> %0, i32 1

  %3  = extractelement <16 x i8> %0, i32 2

  %4  = extractelement <16 x i8> %0, i32 3

  %5  = extractelement <16 x i8> %0, i32 4

  %6  = extractelement <16 x i8> %0, i32 5

  %7  = extractelement <16 x i8> %0, i32 6

  %8  = extractelement <16 x i8> %0, i32 7

  %9  = extractelement <16 x i8> %0, i32 8

  %10 = extractelement <16 x i8> %0, i32 9

  %11 = extractelement <16 x i8> %0, i32 10

  %12 = extractelement <16 x i8> %0, i32 11

  %13 = extractelement <16 x i8> %0, i32 12

  %14 = extractelement <16 x i8> %0, i32 13

  %15 = extractelement <16 x i8> %0, i32 14

  %16 = extractelement <16 x i8> %0, i32 15

  %17 = or i8 %1, %2

  %18 = or i8 %17, %3

  %19 = or i8 %18, %4

  %20 = or i8 %19, %5

  %21 = or i8 %20, %6

  %22 = or i8 %21, %7

  %23 = or i8 %22, %8

  %24 = or i8 %23, %9

  %25 = or i8 %24, %10

  %26 = or i8 %25, %11

  %27 = or i8 %26, %12

  %28 = or i8 %27, %13

  %29 = or i8 %28, %14

  %30 = or i8 %29, %15

  %31 = or i8 %30, %16

  %32 = icmp eq i8 %31, 0

  ret i1 %32

}

define i1 @test_zero1(<16 x i8>){

_L1:

  %vl = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 0, i32 1,

i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>

  %vh = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 8, i32 9,

i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>

  %wl = bitcast <8 x i8> %vl to i64

  %wh = bitcast <8 x i8> %vh to i64

  %w = or i64 %wl, %wh

  %c = icmp eq i64 %w, 0

  ret i1 %c

}

$ opt-5.0 -O3 <null-vector.ll | llc-5.0

        .text

        .file   "<stdin>"

        .globl  test_zero0              # -- Begin function test_zero0

        .p2align        4, 0x90

        .type   test_zero0,@function

test_zero0:                             # @test_zero0

# BB#0:                                 # %_L1

        movaps  %xmm0, -24(%rsp)

        movb    -24(%rsp), %al

        orb     -23(%rsp), %al

        orb     -22(%rsp), %al

        orb     -21(%rsp), %al

        orb     -20(%rsp), %al

        orb     -19(%rsp), %al

        orb     -18(%rsp), %al

        orb     -17(%rsp), %al

        orb     -16(%rsp), %al

        orb     -15(%rsp), %al

        orb     -14(%rsp), %al

        orb     -13(%rsp), %al

        orb     -12(%rsp), %al

        orb     -11(%rsp), %al

        orb     -10(%rsp), %al

        orb     -9(%rsp), %al

        sete    %al

        retq

.Lfunc_end0:

        .size   test_zero0, .Lfunc_end0-test_zero0

                                        # -- End function

        .globl  test_zero1              # -- Begin function test_zero1

        .p2align        4, 0x90

        .type   test_zero1,@function

test_zero1:                             # @test_zero1

# BB#0:                                 # %_L1

        movq    %xmm0, %rax

        pshufd  $78, %xmm0, %xmm0       # xmm0 = xmm0[2,3,0,1]

        movq    %xmm0, %rcx

        orq     %rax, %rcx

        sete    %al

        retq

.Lfunc_end1:

        .size   test_zero1, .Lfunc_end1-test_zero1

                                        # -- End function

        .section        ".note.GNU-stack","",@progbits</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>