<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - vector operations fail to optimize to _mm_testz_si128 / _mm256_testz_si256"

   href="https://bugs.llvm.org/show_bug.cgi?id=37506">37506</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>vector operations fail to optimize to _mm_testz_si128 / _mm256_testz_si256

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>new-bugs

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>All

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>new bugs

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>gonzalobg88@gmail.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>I expect the pmovmskb below to be optimized to a _mm_testz_si128 (see it live:

<a href="https://godbolt.org/g/RvyWRR">https://godbolt.org/g/RvyWRR</a>).

The Rust code generating this is:

pub fn is_ascii_vector128(s: &[u8]) -> Result<(), usize> {

    use ::simd::*;

    let mut i = 0;

    let v128 = u8x16::splat(128);

    let zero = u8x16::splat(0);

    let len = s.len();

    while i + u8x16::lanes() * 2 <= len {

        let x = unsafe { u8x16::load_unaligned_unchecked(&s.get_unchecked(i..))

};

        let y = unsafe { u8x16::load_unaligned_unchecked(&s.get_unchecked(i +

u8x16::lanes()..)) };

        let x: u8x16 = x & v128;

        let y: u8x16 = y & v128;

        if !x.eq(zero).all() || !y.eq(zero).all() {

            break;

        }

        i += u8x16::lanes() * 2;

    }

    Err(i)

}

The LLVM-IR is:

declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) 

define { i64, i64 } @is_ascii_vector128([0 x i8]* %s.0, i64 %s.1) #0 {

start:

%0 = icmp ult i64 %s.1, 32

br i1 %0, label %bb5, label %bb7.preheader

bb7.preheader:                                    ; preds = %start

br label %bb7

bb4:                                              ; preds = %bb17

%1 = add i64 %4, 32

%2 = icmp ugt i64 %1, %s.1

br i1 %2, label %bb5, label %bb7

bb5:                                              ; preds = %bb4, %bb7, %bb17,

%start

%i.0.lcssa = phi i64 [ 0, %start ], [ %i.041, %bb17 ], [ %i.041, %bb7 ], [ %4,

%bb4 ]

%3 = insertvalue { i64, i64 } { i64 1, i64 undef }, i64 %i.0.lcssa, 1

ret { i64, i64 } %3

bb7:                                              ; preds = %bb7.preheader,

%bb4

%4 = phi i64 [ %1, %bb4 ], [ 32, %bb7.preheader ]

%i.041 = phi i64 [ %4, %bb4 ], [ 0, %bb7.preheader ]

%5 = getelementptr inbounds [0 x i8], [0 x i8]* %s.0, i64 0, i64 %i.041

%x.0..sroa_cast.i31 = bitcast i8* %5 to <16 x i8>*

%x.0.copyload.i32 = load <16 x i8>, <16 x i8>* %x.0..sroa_cast.i31, align 1

%x.0.copyload.i32.lobit = ashr <16 x i8> %x.0.copyload.i32, <i8 7, i8 7, i8 7,

i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>

%x.0.copyload.i32.lobit.not = xor <16 x i8> %x.0.copyload.i32.lobit, <i8 -1, i8

-1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8

-1, i8 -1, i8 -1, i8 -1>

%6 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>

%x.0.copyload.i32.lobit.not) 

%7 = icmp eq i32 %6, 65535

br i1 %7, label %bb17, label %bb5

bb17:                                             ; preds = %bb7

%8 = or i64 %i.041, 16

%9 = getelementptr inbounds [0 x i8], [0 x i8]* %s.0, i64 0, i64 %8

%x.0..sroa_cast.i = bitcast i8* %9 to <16 x i8>*

%x.0.copyload.i = load <16 x i8>, <16 x i8>* %x.0..sroa_cast.i, align 1

%x.0.copyload.i.lobit = ashr <16 x i8> %x.0.copyload.i, <i8 7, i8 7, i8 7, i8

7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>

%x.0.copyload.i.lobit.not = xor <16 x i8> %x.0.copyload.i.lobit, <i8 -1, i8 -1,

i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8

-1, i8 -1, i8 -1>

%10 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>

%x.0.copyload.i.lobit.not) #10

%11 = icmp eq i32 %10, 65535

br i1 %11, label %bb4, label %bb5

}

attributes #0 = {  "target-features"="+sse4.1" }</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>