<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - [regression] shuffle sequence in _mm256_blend_pd does not lower to vblendpd anymore"
href="https://bugs.llvm.org/show_bug.cgi?id=38194">38194</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>[regression] shuffle sequence in _mm256_blend_pd does not lower to vblendpd anymore
</td>
</tr>
<tr>
<th>Product</th>
<td>new-bugs
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>All
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>release blocker
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>new bugs
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>gonzalobg88@gmail.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>The regression was found after updating Rust to LLVM7-trunk from LLVM6.
This Rust code (<a href="https://godbolt.org/g/mrCRcd">https://godbolt.org/g/mrCRcd</a>):
#![feature(repr_simd, platform_intrinsics)]
#![allow(non_camel_case_types)]
extern "platform-intrinsic" {
pub fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
}
#[repr(simd)]
pub struct __m256d(f64, f64, f64, f64);
#[inline]
#[target_feature(enable = "avx")]
unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! blend4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
simd_shuffle4(a, b, [$a, $b, $c, $d]);
};
}
macro_rules! blend3 {
($a:expr, $b:expr, $c:expr) => {
match imm8 & 0x8 {
0 => blend4!($a, $b, $c, 3),
_ => blend4!($a, $b, $c, 7),
}
};
}
macro_rules! blend2 {
($a:expr, $b:expr) => {
match imm8 & 0x4 {
0 => blend3!($a, $b, 2),
_ => blend3!($a, $b, 6),
}
};
}
macro_rules! blend1 {
($a:expr) => {
match imm8 & 0x2 {
0 => blend2!($a, 1),
_ => blend2!($a, 5),
}
};
}
match imm8 & 0x1 {
0 => blend1!(0),
_ => blend1!(4),
}
}
#[target_feature(enable = "avx")]
pub unsafe fn foo(a: __m256d, b: __m256d) -> __m256d {
_mm256_blend_pd(a, b, 9)
}
Used to lower to a vblendpd instruction, but now it lowers to:
example::foo:
vmovaps ymm0, ymmword ptr [rsi]
vblendps ymm0, ymm0, ymmword ptr [rdx], 195
vmovaps ymmword ptr [rdi], ymm0
mov rax, rdi
vzeroupper
ret
The LLVM-IR emitted without optimizations is (<a href="https://godbolt.org/g/xsWfr8">https://godbolt.org/g/xsWfr8</a>):
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define internal void @_ZN7example15_mm256_blend_pd17h932ae730d6cb84edE(<4 x
double>* noalias nocapture sret dereferenceable(32), <4 x double>* noalias
nocapture dereferenceable(32) %a, <4 x double>* noalias nocapture
dereferenceable(32) %b, i32 %imm8) unnamed_addr #0 {
%_77 = alloca i8, align 1
%_70 = alloca i8, align 1
%_67 = alloca i8, align 1
%_60 = alloca i8, align 1
%_53 = alloca i8, align 1
%_50 = alloca i8, align 1
%_47 = alloca i8, align 1
%_40 = alloca i8, align 1
%_33 = alloca i8, align 1
%_30 = alloca i8, align 1
%_23 = alloca i8, align 1
%_16 = alloca i8, align 1
%_13 = alloca i8, align 1
%_10 = alloca i8, align 1
%_7 = alloca i8, align 1
%1 = and i32 %imm8, 255
%2 = icmp ule i32 %1, -1
call void @llvm.assume(i1 %2)
%3 = trunc i32 %1 to i8
%4 = and i8 %3, 1
store i8 %4, i8* %_7, align 1
%5 = load i8, i8* %_7, align 1
%6 = icmp eq i8 %5, 0
br i1 %6, label %bb1, label %bb2
bb1: ; preds = %start
%7 = and i8 %3, 2
store i8 %7, i8* %_10, align 1
%8 = load i8, i8* %_10, align 1
%9 = icmp eq i8 %8, 0
br i1 %9, label %bb4, label %bb5
bb2: ; preds = %start
%10 = and i8 %3, 2
store i8 %10, i8* %_47, align 1
%11 = load i8, i8* %_47, align 1
%12 = icmp eq i8 %11, 0
br i1 %12, label %bb33, label %bb34
bb3: ; preds = %bb6, %bb35
ret void
bb4: ; preds = %bb1
%13 = and i8 %3, 4
store i8 %13, i8* %_13, align 1
%14 = load i8, i8* %_13, align 1
%15 = icmp eq i8 %14, 0
br i1 %15, label %bb7, label %bb8
bb5: ; preds = %bb1
%16 = and i8 %3, 4
store i8 %16, i8* %_30, align 1
%17 = load i8, i8* %_30, align 1
%18 = icmp eq i8 %17, 0
br i1 %18, label %bb20, label %bb21
bb6: ; preds = %bb9, %bb22
br label %bb3
bb7: ; preds = %bb4
%19 = and i8 %3, 8
store i8 %19, i8* %_16, align 1
%20 = load i8, i8* %_16, align 1
%21 = icmp eq i8 %20, 0
br i1 %21, label %bb10, label %bb11
bb8: ; preds = %bb4
%22 = and i8 %3, 8
store i8 %22, i8* %_23, align 1
%23 = load i8, i8* %_23, align 1
%24 = icmp eq i8 %23, 0
br i1 %24, label %bb15, label %bb16
bb9: ; preds = %bb12, %bb17
br label %bb6
bb10: ; preds = %bb7
%25 = load <4 x double>, <4 x double>* %a, align 32
%26 = load <4 x double>, <4 x double>* %b, align 32
%27 = shufflevector <4 x double> %25, <4 x double> %26, <4 x i32> <i32 0, i32
1, i32 2, i32 3>
store <4 x double> %27, <4 x double>* %0, align 32
br label %bb13
bb11: ; preds = %bb7
%28 = load <4 x double>, <4 x double>* %a, align 32
%29 = load <4 x double>, <4 x double>* %b, align 32
%30 = shufflevector <4 x double> %28, <4 x double> %29, <4 x i32> <i32 0, i32
1, i32 2, i32 7>
store <4 x double> %30, <4 x double>* %0, align 32
br label %bb14
bb12: ; preds = %bb13, %bb14
br label %bb9
bb13: ; preds = %bb10
br label %bb12
bb14: ; preds = %bb11
br label %bb12
bb15: ; preds = %bb8
%31 = load <4 x double>, <4 x double>* %a, align 32
%32 = load <4 x double>, <4 x double>* %b, align 32
%33 = shufflevector <4 x double> %31, <4 x double> %32, <4 x i32> <i32 0, i32
1, i32 6, i32 3>
store <4 x double> %33, <4 x double>* %0, align 32
br label %bb18
bb16: ; preds = %bb8
%34 = load <4 x double>, <4 x double>* %a, align 32
%35 = load <4 x double>, <4 x double>* %b, align 32
%36 = shufflevector <4 x double> %34, <4 x double> %35, <4 x i32> <i32 0, i32
1, i32 6, i32 7>
store <4 x double> %36, <4 x double>* %0, align 32
br label %bb19
bb17: ; preds = %bb18, %bb19
br label %bb9
bb18: ; preds = %bb15
br label %bb17
bb19: ; preds = %bb16
br label %bb17
bb20: ; preds = %bb5
%37 = and i8 %3, 8
store i8 %37, i8* %_33, align 1
%38 = load i8, i8* %_33, align 1
%39 = icmp eq i8 %38, 0
br i1 %39, label %bb23, label %bb24
bb21: ; preds = %bb5
%40 = and i8 %3, 8
store i8 %40, i8* %_40, align 1
%41 = load i8, i8* %_40, align 1
%42 = icmp eq i8 %41, 0
br i1 %42, label %bb28, label %bb29
bb22: ; preds = %bb25, %bb30
br label %bb6
bb23: ; preds = %bb20
%43 = load <4 x double>, <4 x double>* %a, align 32
%44 = load <4 x double>, <4 x double>* %b, align 32
%45 = shufflevector <4 x double> %43, <4 x double> %44, <4 x i32> <i32 0, i32
5, i32 2, i32 3>
store <4 x double> %45, <4 x double>* %0, align 32
br label %bb26
bb24: ; preds = %bb20
%46 = load <4 x double>, <4 x double>* %a, align 32
%47 = load <4 x double>, <4 x double>* %b, align 32
%48 = shufflevector <4 x double> %46, <4 x double> %47, <4 x i32> <i32 0, i32
5, i32 2, i32 7>
store <4 x double> %48, <4 x double>* %0, align 32
br label %bb27
bb25: ; preds = %bb26, %bb27
br label %bb22
bb26: ; preds = %bb23
br label %bb25
bb27: ; preds = %bb24
br label %bb25
bb28: ; preds = %bb21
%49 = load <4 x double>, <4 x double>* %a, align 32
%50 = load <4 x double>, <4 x double>* %b, align 32
%51 = shufflevector <4 x double> %49, <4 x double> %50, <4 x i32> <i32 0, i32
5, i32 6, i32 3>
store <4 x double> %51, <4 x double>* %0, align 32
br label %bb31
bb29: ; preds = %bb21
%52 = load <4 x double>, <4 x double>* %a, align 32
%53 = load <4 x double>, <4 x double>* %b, align 32
%54 = shufflevector <4 x double> %52, <4 x double> %53, <4 x i32> <i32 0, i32
5, i32 6, i32 7>
store <4 x double> %54, <4 x double>* %0, align 32
br label %bb32
bb30: ; preds = %bb31, %bb32
br label %bb22
bb31: ; preds = %bb28
br label %bb30
bb32: ; preds = %bb29
br label %bb30
bb33: ; preds = %bb2
%55 = and i8 %3, 4
store i8 %55, i8* %_50, align 1
%56 = load i8, i8* %_50, align 1
%57 = icmp eq i8 %56, 0
br i1 %57, label %bb36, label %bb37
bb34: ; preds = %bb2
%58 = and i8 %3, 4
store i8 %58, i8* %_67, align 1
%59 = load i8, i8* %_67, align 1
%60 = icmp eq i8 %59, 0
br i1 %60, label %bb49, label %bb50
bb35: ; preds = %bb38, %bb51
br label %bb3
bb36: ; preds = %bb33
%61 = and i8 %3, 8
store i8 %61, i8* %_53, align 1
%62 = load i8, i8* %_53, align 1
%63 = icmp eq i8 %62, 0
br i1 %63, label %bb39, label %bb40
bb37: ; preds = %bb33
%64 = and i8 %3, 8
store i8 %64, i8* %_60, align 1
%65 = load i8, i8* %_60, align 1
%66 = icmp eq i8 %65, 0
br i1 %66, label %bb44, label %bb45
bb38: ; preds = %bb41, %bb46
br label %bb35
bb39: ; preds = %bb36
%67 = load <4 x double>, <4 x double>* %a, align 32
%68 = load <4 x double>, <4 x double>* %b, align 32
%69 = shufflevector <4 x double> %67, <4 x double> %68, <4 x i32> <i32 4, i32
1, i32 2, i32 3>
store <4 x double> %69, <4 x double>* %0, align 32
br label %bb42
bb40: ; preds = %bb36
%70 = load <4 x double>, <4 x double>* %a, align 32
%71 = load <4 x double>, <4 x double>* %b, align 32
%72 = shufflevector <4 x double> %70, <4 x double> %71, <4 x i32> <i32 4, i32
1, i32 2, i32 7>
store <4 x double> %72, <4 x double>* %0, align 32
br label %bb43
bb41: ; preds = %bb42, %bb43
br label %bb38
bb42: ; preds = %bb39
br label %bb41
bb43: ; preds = %bb40
br label %bb41
bb44: ; preds = %bb37
%73 = load <4 x double>, <4 x double>* %a, align 32
%74 = load <4 x double>, <4 x double>* %b, align 32
%75 = shufflevector <4 x double> %73, <4 x double> %74, <4 x i32> <i32 4, i32
1, i32 6, i32 3>
store <4 x double> %75, <4 x double>* %0, align 32
br label %bb47
bb45: ; preds = %bb37
%76 = load <4 x double>, <4 x double>* %a, align 32
%77 = load <4 x double>, <4 x double>* %b, align 32
%78 = shufflevector <4 x double> %76, <4 x double> %77, <4 x i32> <i32 4, i32
1, i32 6, i32 7>
store <4 x double> %78, <4 x double>* %0, align 32
br label %bb48
bb46: ; preds = %bb47, %bb48
br label %bb38
bb47: ; preds = %bb44
br label %bb46
bb48: ; preds = %bb45
br label %bb46
bb49: ; preds = %bb34
%79 = and i8 %3, 8
store i8 %79, i8* %_70, align 1
%80 = load i8, i8* %_70, align 1
%81 = icmp eq i8 %80, 0
br i1 %81, label %bb52, label %bb53
bb50: ; preds = %bb34
%82 = and i8 %3, 8
store i8 %82, i8* %_77, align 1
%83 = load i8, i8* %_77, align 1
%84 = icmp eq i8 %83, 0
br i1 %84, label %bb57, label %bb58
bb51: ; preds = %bb54, %bb59
br label %bb35
bb52: ; preds = %bb49
%85 = load <4 x double>, <4 x double>* %a, align 32
%86 = load <4 x double>, <4 x double>* %b, align 32
%87 = shufflevector <4 x double> %85, <4 x double> %86, <4 x i32> <i32 4, i32
5, i32 2, i32 3>
store <4 x double> %87, <4 x double>* %0, align 32
br label %bb55
bb53: ; preds = %bb49
%88 = load <4 x double>, <4 x double>* %a, align 32
%89 = load <4 x double>, <4 x double>* %b, align 32
%90 = shufflevector <4 x double> %88, <4 x double> %89, <4 x i32> <i32 4, i32
5, i32 2, i32 7>
store <4 x double> %90, <4 x double>* %0, align 32
br label %bb56
bb54: ; preds = %bb55, %bb56
br label %bb51
bb55: ; preds = %bb52
br label %bb54
bb56: ; preds = %bb53
br label %bb54
bb57: ; preds = %bb50
%91 = load <4 x double>, <4 x double>* %a, align 32
%92 = load <4 x double>, <4 x double>* %b, align 32
%93 = shufflevector <4 x double> %91, <4 x double> %92, <4 x i32> <i32 4, i32
5, i32 6, i32 3>
store <4 x double> %93, <4 x double>* %0, align 32
br label %bb60
bb58: ; preds = %bb50
%94 = load <4 x double>, <4 x double>* %a, align 32
%95 = load <4 x double>, <4 x double>* %b, align 32
%96 = shufflevector <4 x double> %94, <4 x double> %95, <4 x i32> <i32 4, i32
5, i32 6, i32 7>
store <4 x double> %96, <4 x double>* %0, align 32
br label %bb61
bb59: ; preds = %bb60, %bb61
br label %bb51
bb60: ; preds = %bb57
br label %bb59
bb61: ; preds = %bb58
br label %bb59
}
define void @_ZN7example3foo17h81ec92146872827cE(<4 x double>* noalias
nocapture sret dereferenceable(32), <4 x double>* noalias nocapture
dereferenceable(32) %a, <4 x double>* noalias nocapture dereferenceable(32) %b)
unnamed_addr #1 {
%arg1 = alloca <4 x double>, align 32
%arg = alloca <4 x double>, align 32
%1 = load <4 x double>, <4 x double>* %a, align 32
%2 = load <4 x double>, <4 x double>* %b, align 32
store <4 x double> %1, <4 x double>* %arg, align 32
store <4 x double> %2, <4 x double>* %arg1, align 32
call void @_ZN7example15_mm256_blend_pd17h932ae730d6cb84edE(<4 x double>*
noalias nocapture sret dereferenceable(32) %0, <4 x double>* noalias nocapture
dereferenceable(32) %arg, <4 x double>* noalias nocapture dereferenceable(32)
%arg1, i32 9)
br label %bb1
bb1: ; preds = %start
ret void
}
declare void @llvm.assume(i1) #2
attributes #0 = { inlinehint nounwind "probe-stack"="__rust_probestack"
"target-features"="+avx" }
attributes #1 = { nounwind "probe-stack"="__rust_probestack"
"target-features"="+avx" }
attributes #2 = { nounwind }</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>