[llvm-bugs] [Bug 50500] New: Miscompile with SLP vectorizer + instcombine

via llvm-bugs llvm-bugs at lists.llvm.org
Wed May 26 16:46:09 PDT 2021


https://bugs.llvm.org/show_bug.cgi?id=50500

            Bug ID: 50500
           Summary: Miscompile with SLP vectorizer + instcombine
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Scalar Optimizations
          Assignee: unassignedbugs at nondot.org
          Reporter: rupprecht at google.com
                CC: a.bataev at hotmail.com, llvm-bugs at lists.llvm.org

D101191 in instcombine exposed a miscompile on an internal test, which goes
away when disabling SLP vectorizer.

The following generates the incorrect result with "clang++ -msse4.2 -O2
repro.cc", but the correct result with "clang++ -msse4.2 -O2 repro.cc
-fno-slp-vectorize":

```
#include <cstdint>
#include <cstdio>

// Needed to reproduce the issue with a small sample
template <class T>
void DoNotOptimize(const T& var) {
  asm volatile("" : "+m"(const_cast<T&>(var)));
}

struct Stats {
  int64_t x = 0;
  int64_t total = 0;
};

Stats GetStats(float a, float b) {
  float c = b ? a / b : 0.0;
  bool is_outside_a = a <= 0 || a >= 2000;
  bool is_outside_b = b <= 1 || b >= 2;
  bool is_outside_c = c <= 50 || c >= 1000;

  Stats stats;
  stats.x = is_outside_b && is_outside_c ? 1 : 0;
  stats.total = (is_outside_a || is_outside_b) ? 1 : 0;
  DoNotOptimize(stats);
  return stats;
}

int main() {
  float a = 1000.0;
  float b = 0.1;
  DoNotOptimize(a);
  DoNotOptimize(b);
  const auto stats = GetStats(a, b);
  printf("total = %ld\n", stats.total);
  return stats.total == 1 ? 0 : 1;
}
```

In this example, a is 1000 (inside the first range), b is 0.1 (outside the
range), and c is 10000 (outside the range). Therefore:
- is_outside_a == false
- is_outside_b == true
- is_outside_c == true
- is_outside_b && is_outside_c == true && true == true
- is_outside_a || is_outside_b == false || true == true

However, with SLP vectorizer enabled, stats.total is 0, when it should be 1.

The following IR was reduced from the C++ above, and can reproduce the
difference with the same invocations:

```
; ModuleID = 'repro.ll'
source_filename = "repro.cc"
target datalayout =
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.widget = type { i64, i64 }

@global = private unnamed_addr constant [13 x i8] c"total = %ld\0A\00", align 1

define { i64, i64 } @baz(float %arg, float %arg1) local_unnamed_addr #0 {
bb:
  %tmp = alloca %struct.widget, align 8
  %tmp2 = fcmp une float %arg1, 0.000000e+00
  %tmp3 = fdiv float %arg, %arg1
  %tmp4 = select i1 %tmp2, float %tmp3, float 0.000000e+00
  %tmp5 = fcmp ole float %arg, 0.000000e+00
  %tmp6 = fcmp oge float %arg, 2.000000e+03
  %tmp7 = or i1 %tmp5, %tmp6
  %tmp8 = fcmp ole float %arg1, 1.000000e+00
  %tmp9 = fcmp oge float %arg1, 2.000000e+00
  %tmp10 = or i1 %tmp8, %tmp9
  %tmp11 = fcmp ole float %tmp4, 5.000000e+01
  %tmp12 = fcmp oge float %tmp4, 1.000000e+03
  %tmp13 = or i1 %tmp11, %tmp12
  %tmp14 = select i1 %tmp10, i1 %tmp13, i1 false
  %tmp15 = zext i1 %tmp14 to i64
  %tmp16 = getelementptr inbounds %struct.widget, %struct.widget* %tmp, i64 0,
i32 0
  store i64 %tmp15, i64* %tmp16, align 8
  %tmp17 = select i1 %tmp7, i1 true, i1 %tmp10
  %tmp18 = zext i1 %tmp17 to i64
  %tmp19 = getelementptr inbounds %struct.widget, %struct.widget* %tmp, i64 0,
i32 1
  store i64 %tmp18, i64* %tmp19, align 8
  call void asm sideeffect "",
"=*m,*m,~{dirflag},~{fpsr},~{flags}"(%struct.widget* %tmp, %struct.widget*
%tmp)
  %tmp20 = load i64, i64* %tmp19, align 8
  %tmp21 = insertvalue { i64, i64 } undef, i64 %tmp20, 1
  ret { i64, i64 } %tmp21
}

define i32 @main() local_unnamed_addr {
bb:
  %tmp = alloca float, align 4
  store float 1.000000e+03, float* %tmp, align 4
  %tmp1 = load float, float* %tmp, align 4
  %tmp2 = call { i64, i64 } @baz(float %tmp1, float undef)
  %tmp3 = extractvalue { i64, i64 } %tmp2, 1
  %tmp4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8],
[13 x i8]* @global, i64 0, i64 0), i64 %tmp3)
  ret i32 undef
}

declare i32 @printf(i8*, ...) local_unnamed_addr

attributes #0 = {
"target-features"="+cx8,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
}
```

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20210526/2b22338b/attachment-0001.html>


More information about the llvm-bugs mailing list