<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - vector operations fail to optimize to _mm_testz_si128 / _mm256_testz_si256"
href="https://bugs.llvm.org/show_bug.cgi?id=37506">37506</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>vector operations fail to optimize to _mm_testz_si128 / _mm256_testz_si256
</td>
</tr>
<tr>
<th>Product</th>
<td>new-bugs
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>new bugs
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>gonzalobg88@gmail.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>I expect the pmovmskb below to be optimized to a _mm_testz_si128 (see it live:
<a href="https://godbolt.org/g/RvyWRR">https://godbolt.org/g/RvyWRR</a>).
The Rust code generating this is:
pub fn is_ascii_vector128(s: &[u8]) -> Result<(), usize> {
use ::simd::*;
let mut i = 0;
let v128 = u8x16::splat(128);
let zero = u8x16::splat(0);
let len = s.len();
while i + u8x16::lanes() * 2 <= len {
let x = unsafe { u8x16::load_unaligned_unchecked(&s.get_unchecked(i..))
};
let y = unsafe { u8x16::load_unaligned_unchecked(&s.get_unchecked(i +
u8x16::lanes()..)) };
let x: u8x16 = x & v128;
let y: u8x16 = y & v128;
if !x.eq(zero).all() || !y.eq(zero).all() {
break;
}
i += u8x16::lanes() * 2;
}
Err(i)
}
The LLVM-IR is:
declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
define { i64, i64 } @is_ascii_vector128([0 x i8]* %s.0, i64 %s.1) #0 {
start:
%0 = icmp ult i64 %s.1, 32
br i1 %0, label %bb5, label %bb7.preheader
bb7.preheader: ; preds = %start
br label %bb7
bb4: ; preds = %bb17
%1 = add i64 %4, 32
%2 = icmp ugt i64 %1, %s.1
br i1 %2, label %bb5, label %bb7
bb5: ; preds = %bb4, %bb7, %bb17,
%start
%i.0.lcssa = phi i64 [ 0, %start ], [ %i.041, %bb17 ], [ %i.041, %bb7 ], [ %4,
%bb4 ]
%3 = insertvalue { i64, i64 } { i64 1, i64 undef }, i64 %i.0.lcssa, 1
ret { i64, i64 } %3
bb7: ; preds = %bb7.preheader,
%bb4
%4 = phi i64 [ %1, %bb4 ], [ 32, %bb7.preheader ]
%i.041 = phi i64 [ %4, %bb4 ], [ 0, %bb7.preheader ]
%5 = getelementptr inbounds [0 x i8], [0 x i8]* %s.0, i64 0, i64 %i.041
%x.0..sroa_cast.i31 = bitcast i8* %5 to <16 x i8>*
%x.0.copyload.i32 = load <16 x i8>, <16 x i8>* %x.0..sroa_cast.i31, align 1
%x.0.copyload.i32.lobit = ashr <16 x i8> %x.0.copyload.i32, <i8 7, i8 7, i8 7,
i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%x.0.copyload.i32.lobit.not = xor <16 x i8> %x.0.copyload.i32.lobit, <i8 -1, i8
-1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8
-1, i8 -1, i8 -1, i8 -1>
%6 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>
%x.0.copyload.i32.lobit.not)
%7 = icmp eq i32 %6, 65535
br i1 %7, label %bb17, label %bb5
bb17: ; preds = %bb7
%8 = or i64 %i.041, 16
%9 = getelementptr inbounds [0 x i8], [0 x i8]* %s.0, i64 0, i64 %8
%x.0..sroa_cast.i = bitcast i8* %9 to <16 x i8>*
%x.0.copyload.i = load <16 x i8>, <16 x i8>* %x.0..sroa_cast.i, align 1
%x.0.copyload.i.lobit = ashr <16 x i8> %x.0.copyload.i, <i8 7, i8 7, i8 7, i8
7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
%x.0.copyload.i.lobit.not = xor <16 x i8> %x.0.copyload.i.lobit, <i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8
-1, i8 -1, i8 -1>
%10 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>
%x.0.copyload.i.lobit.not) #10
%11 = icmp eq i32 %10, 65535
br i1 %11, label %bb4, label %bb5
}
attributes #0 = { "target-features"="+sse4.1" }</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>