[llvm-bugs] [Bug 37506] New: vector operations fail to optimize to _mm_testz_si128 / _mm256_testz_si256

Thu May 17 09:52:37 PDT 2018

https://bugs.llvm.org/show_bug.cgi?id=37506

            Bug ID: 37506
           Summary: vector operations fail to optimize to _mm_testz_si128
                    / _mm256_testz_si256
           Product: new-bugs
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: new bugs
          Assignee: unassignedbugs at nondot.org
          Reporter: gonzalobg88 at gmail.com
                CC: llvm-bugs at lists.llvm.org

I expect the pmovmskb below to be optimized to a _mm_testz_si128 (see it live:
https://godbolt.org/g/RvyWRR).

The Rust code generating this is:

pub fn is_ascii_vector128(s: &[u8]) -> Result<(), usize> {
    use ::simd::*;
    let mut i = 0;
    let v128 = u8x16::splat(128);
    let zero = u8x16::splat(0);
    let len = s.len();
    while i + u8x16::lanes() * 2 <= len {
        let x = unsafe { u8x16::load_unaligned_unchecked(&s.get_unchecked(i..))
};
        let y = unsafe { u8x16::load_unaligned_unchecked(&s.get_unchecked(i +
u8x16::lanes()..)) };
        let x: u8x16 = x & v128;
        let y: u8x16 = y & v128;
        if !x.eq(zero).all() || !y.eq(zero).all() {
            break;
        }
        i += u8x16::lanes() * 2;
    }
    Err(i)
}

The LLVM-IR is:

declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) 

define { i64, i64 } @is_ascii_vector128([0 x i8]* %s.0, i64 %s.1) #0 {
start:
%0 = icmp ult i64 %s.1, 32
br i1 %0, label %bb5, label %bb7.preheader

bb7.preheader:                                    ; preds = %start
br label %bb7

bb4:                                              ; preds = %bb17
%1 = add i64 %4, 32
%2 = icmp ugt i64 %1, %s.1
br i1 %2, label %bb5, label %bb7

bb5:                                              ; preds = %bb4, %bb7, %bb17,
%start
%i.0.lcssa = phi i64 [ 0, %start ], [ %i.041, %bb17 ], [ %i.041, %bb7 ], [ %4,
%bb4 ]
%3 = insertvalue { i64, i64 } { i64 1, i64 undef }, i64 %i.0.lcssa, 1
ret { i64, i64 } %3

bb7:                                              ; preds = %bb7.preheader,
%bb4
%4 = phi i64 [ %1, %bb4 ], [ 32, %bb7.preheader ]
%i.041 = phi i64 [ %4, %bb4 ], [ 0, %bb7.preheader ]
%5 = getelementptr inbounds [0 x i8], [0 x i8]* %s.0, i64 0, i64 %i.041
%x.0..sroa_cast.i31 = bitcast i8* %5 to <16 x i8>*
%x.0.copyload.i32 = load <16 x i8>, <16 x i8>* %x.0..sroa_cast.i31, align 1
%x.0.copyload.i32.lobit = ashr <16 x i8> %x.0.copyload.i32, <i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%x.0.copyload.i32.lobit.not = xor <16 x i8> %x.0.copyload.i32.lobit, <i8 -1, i8
-1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8
-1, i8 -1, i8 -1, i8 -1>
%6 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>
%x.0.copyload.i32.lobit.not) 
%7 = icmp eq i32 %6, 65535
br i1 %7, label %bb17, label %bb5

bb17:                                             ; preds = %bb7
%8 = or i64 %i.041, 16
%9 = getelementptr inbounds [0 x i8], [0 x i8]* %s.0, i64 0, i64 %8
%x.0..sroa_cast.i = bitcast i8* %9 to <16 x i8>*
%x.0.copyload.i = load <16 x i8>, <16 x i8>* %x.0..sroa_cast.i, align 1
%x.0.copyload.i.lobit = ashr <16 x i8> %x.0.copyload.i, <i8 7, i8 7, i8 7, i8
7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%x.0.copyload.i.lobit.not = xor <16 x i8> %x.0.copyload.i.lobit, <i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8
-1, i8 -1, i8 -1>
%10 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>
%x.0.copyload.i.lobit.not) #10
%11 = icmp eq i32 %10, 65535
br i1 %11, label %bb4, label %bb5
}

attributes #0 = {  "target-features"="+sse4.1" }

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180517/bdc673ae/attachment.html>