[llvm-bugs] [Bug 37506] New: vector operations fail to optimize to _mm_testz_si128 / _mm256_testz_si256
via llvm-bugs
llvm-bugs at lists.llvm.org
Thu May 17 09:52:37 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=37506
Bug ID: 37506
Summary: vector operations fail to optimize to _mm_testz_si128
/ _mm256_testz_si256
Product: new-bugs
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: new bugs
Assignee: unassignedbugs at nondot.org
Reporter: gonzalobg88 at gmail.com
CC: llvm-bugs at lists.llvm.org
I expect the pmovmskb below to be optimized to a _mm_testz_si128 (see it live:
https://godbolt.org/g/RvyWRR).
The Rust code generating this is:
pub fn is_ascii_vector128(s: &[u8]) -> Result<(), usize> {
use ::simd::*;
let mut i = 0;
let v128 = u8x16::splat(128);
let zero = u8x16::splat(0);
let len = s.len();
while i + u8x16::lanes() * 2 <= len {
let x = unsafe { u8x16::load_unaligned_unchecked(&s.get_unchecked(i..))
};
let y = unsafe { u8x16::load_unaligned_unchecked(&s.get_unchecked(i +
u8x16::lanes()..)) };
let x: u8x16 = x & v128;
let y: u8x16 = y & v128;
if !x.eq(zero).all() || !y.eq(zero).all() {
break;
}
i += u8x16::lanes() * 2;
}
Err(i)
}
The LLVM-IR is:
declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
define { i64, i64 } @is_ascii_vector128([0 x i8]* %s.0, i64 %s.1) #0 {
start:
%0 = icmp ult i64 %s.1, 32
br i1 %0, label %bb5, label %bb7.preheader
bb7.preheader: ; preds = %start
br label %bb7
bb4: ; preds = %bb17
%1 = add i64 %4, 32
%2 = icmp ugt i64 %1, %s.1
br i1 %2, label %bb5, label %bb7
bb5: ; preds = %bb4, %bb7, %bb17,
%start
%i.0.lcssa = phi i64 [ 0, %start ], [ %i.041, %bb17 ], [ %i.041, %bb7 ], [ %4,
%bb4 ]
%3 = insertvalue { i64, i64 } { i64 1, i64 undef }, i64 %i.0.lcssa, 1
ret { i64, i64 } %3
bb7: ; preds = %bb7.preheader,
%bb4
%4 = phi i64 [ %1, %bb4 ], [ 32, %bb7.preheader ]
%i.041 = phi i64 [ %4, %bb4 ], [ 0, %bb7.preheader ]
%5 = getelementptr inbounds [0 x i8], [0 x i8]* %s.0, i64 0, i64 %i.041
%x.0..sroa_cast.i31 = bitcast i8* %5 to <16 x i8>*
%x.0.copyload.i32 = load <16 x i8>, <16 x i8>* %x.0..sroa_cast.i31, align 1
%x.0.copyload.i32.lobit = ashr <16 x i8> %x.0.copyload.i32, <i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%x.0.copyload.i32.lobit.not = xor <16 x i8> %x.0.copyload.i32.lobit, <i8 -1, i8
-1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8
-1, i8 -1, i8 -1, i8 -1>
%6 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>
%x.0.copyload.i32.lobit.not)
%7 = icmp eq i32 %6, 65535
br i1 %7, label %bb17, label %bb5
bb17: ; preds = %bb7
%8 = or i64 %i.041, 16
%9 = getelementptr inbounds [0 x i8], [0 x i8]* %s.0, i64 0, i64 %8
%x.0..sroa_cast.i = bitcast i8* %9 to <16 x i8>*
%x.0.copyload.i = load <16 x i8>, <16 x i8>* %x.0..sroa_cast.i, align 1
%x.0.copyload.i.lobit = ashr <16 x i8> %x.0.copyload.i, <i8 7, i8 7, i8 7, i8
7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%x.0.copyload.i.lobit.not = xor <16 x i8> %x.0.copyload.i.lobit, <i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8
-1, i8 -1, i8 -1>
%10 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>
%x.0.copyload.i.lobit.not) #10
%11 = icmp eq i32 %10, 65535
br i1 %11, label %bb4, label %bb5
}
attributes #0 = { "target-features"="+sse4.1" }
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180517/bdc673ae/attachment.html>
More information about the llvm-bugs
mailing list