<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Miscompile with SLP vectorizer + instcombine"
href="https://bugs.llvm.org/show_bug.cgi?id=50500">50500</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Miscompile with SLP vectorizer + instcombine
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Linux
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Scalar Optimizations
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>rupprecht@google.com
</td>
</tr>
<tr>
<th>CC</th>
<td>a.bataev@hotmail.com, llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>D101191 in instcombine exposed a miscompile on an internal test, which goes
away when disabling SLP vectorizer.
The following generates the incorrect result with "clang++ -msse4.2 -O2
repro.cc", but the correct result with "clang++ -msse4.2 -O2 repro.cc
-fno-slp-vectorize":
```
#include <cstdint>
#include <cstdio>
// Needed to reproduce the issue with a small sample
template <class T>
void DoNotOptimize(const T& var) {
asm volatile("" : "+m"(const_cast<T&>(var)));
}
struct Stats {
int64_t x = 0;
int64_t total = 0;
};
Stats GetStats(float a, float b) {
float c = b ? a / b : 0.0;
bool is_outside_a = a <= 0 || a >= 2000;
bool is_outside_b = b <= 1 || b >= 2;
bool is_outside_c = c <= 50 || c >= 1000;
Stats stats;
stats.x = is_outside_b && is_outside_c ? 1 : 0;
stats.total = (is_outside_a || is_outside_b) ? 1 : 0;
DoNotOptimize(stats);
return stats;
}
int main() {
float a = 1000.0;
float b = 0.1;
DoNotOptimize(a);
DoNotOptimize(b);
const auto stats = GetStats(a, b);
printf("total = %ld\n", stats.total);
return stats.total == 1 ? 0 : 1;
}
```
In this example, a is 1000 (inside the first range), b is 0.1 (outside the
range), and c is 10000 (outside the range). Therefore:
- is_outside_a == false
- is_outside_b == true
- is_outside_c == true
- is_outside_b && is_outside_c == true && true == true
- is_outside_a || is_outside_b == false || true == true
However, with SLP vectorizer enabled, stats.total is 0, when it should be 1.
The following IR was reduced from the C++ above, and can reproduce the
difference with the same invocations:
```
; ModuleID = 'repro.ll'
source_filename = "repro.cc"
target datalayout =
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.widget = type { i64, i64 }
@global = private unnamed_addr constant [13 x i8] c"total = %ld\0A\00", align 1
define { i64, i64 } @baz(float %arg, float %arg1) local_unnamed_addr #0 {
bb:
%tmp = alloca %struct.widget, align 8
%tmp2 = fcmp une float %arg1, 0.000000e+00
%tmp3 = fdiv float %arg, %arg1
%tmp4 = select i1 %tmp2, float %tmp3, float 0.000000e+00
%tmp5 = fcmp ole float %arg, 0.000000e+00
%tmp6 = fcmp oge float %arg, 2.000000e+03
%tmp7 = or i1 %tmp5, %tmp6
%tmp8 = fcmp ole float %arg1, 1.000000e+00
%tmp9 = fcmp oge float %arg1, 2.000000e+00
%tmp10 = or i1 %tmp8, %tmp9
%tmp11 = fcmp ole float %tmp4, 5.000000e+01
%tmp12 = fcmp oge float %tmp4, 1.000000e+03
%tmp13 = or i1 %tmp11, %tmp12
%tmp14 = select i1 %tmp10, i1 %tmp13, i1 false
%tmp15 = zext i1 %tmp14 to i64
%tmp16 = getelementptr inbounds %struct.widget, %struct.widget* %tmp, i64 0,
i32 0
store i64 %tmp15, i64* %tmp16, align 8
%tmp17 = select i1 %tmp7, i1 true, i1 %tmp10
%tmp18 = zext i1 %tmp17 to i64
%tmp19 = getelementptr inbounds %struct.widget, %struct.widget* %tmp, i64 0,
i32 1
store i64 %tmp18, i64* %tmp19, align 8
call void asm sideeffect "",
"=*m,*m,~{dirflag},~{fpsr},~{flags}"(%struct.widget* %tmp, %struct.widget*
%tmp)
%tmp20 = load i64, i64* %tmp19, align 8
%tmp21 = insertvalue { i64, i64 } undef, i64 %tmp20, 1
ret { i64, i64 } %tmp21
}
define i32 @main() local_unnamed_addr {
bb:
%tmp = alloca float, align 4
store float 1.000000e+03, float* %tmp, align 4
%tmp1 = load float, float* %tmp, align 4
%tmp2 = call { i64, i64 } @baz(float %tmp1, float undef)
%tmp3 = extractvalue { i64, i64 } %tmp2, 1
%tmp4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8],
[13 x i8]* @global, i64 0, i64 0), i64 %tmp3)
ret i32 undef
}
declare i32 @printf(i8*, ...) local_unnamed_addr
attributes #0 = {
"target-features"="+cx8,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
}
```</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>