[llvm-bugs] [Bug 43745] New: [SLP] Failure to create v2f64 comparison reductions
via llvm-bugs
llvm-bugs at lists.llvm.org
Mon Oct 21 06:50:37 PDT 2019
https://bugs.llvm.org/show_bug.cgi?id=43745
Bug ID: 43745
Summary: [SLP] Failure to create v2f64 comparison reductions
Product: libraries
Version: trunk
Hardware: PC
OS: Windows NT
Status: NEW
Severity: enhancement
Priority: P
Component: Scalar Optimizations
Assignee: unassignedbugs at nondot.org
Reporter: llvm-dev at redking.me.uk
CC: a.bataev at hotmail.com, andrea.dibiagio at gmail.com,
craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
spatel+llvm at rotateright.com
Current Codegen: https://godbolt.org/z/n0UB_k
Pulled out of the c-ray benchmark:
#define ERR_MARGIN 1e-6
bool cmp_lt_gt(double a, double b, double c) {
double t1 = (-b + c) / (2.0 * a);
double t2 = (-b - c) / (2.0 * a);
if((t1 < ERR_MARGIN && t2 < ERR_MARGIN) || (t1 > 1.0 && t2 > 1.0))
return 0;
return 1;
}
SLP fails to create AND reductions for either of these, let alone merge them
into a single (tweaked) reduction (and branch). Oddly it also manages to
vectorize only one of the comparisons but then fails to form a reduction for
the result.
clang -g0 -O3 -march=btver2 -emit-llvm
define i1 @_Z9cmp_lt_gtddd(double %0, double %1, double %2) {
%4 = fneg double %1
%5 = fmul double %0, 2.000000e+00
%6 = insertelement <2 x double> undef, double %4, i32 0
%7 = insertelement <2 x double> %6, double %2, i32 1
%8 = insertelement <2 x double> undef, double %2, i32 0
%9 = insertelement <2 x double> %8, double %1, i32 1
%10 = fsub <2 x double> %7, %9
%11 = insertelement <2 x double> undef, double %5, i32 0
%12 = shufflevector <2 x double> %11, <2 x double> undef, <2 x i32>
zeroinitializer
%13 = fdiv <2 x double> %10, %12
%14 = extractelement <2 x double> %13, i32 1
%15 = fcmp olt double %14, 0x3EB0C6F7A0B5ED8D
%16 = extractelement <2 x double> %13, i32 0
%17 = fcmp olt double %16, 0x3EB0C6F7A0B5ED8D
%18 = and i1 %15, %17
br i1 %18, label %24, label %19
19: ; preds = %3
%20 = fcmp ule <2 x double> %13, <double 1.000000e+00, double 1.000000e+00>
%21 = extractelement <2 x i1> %20, i32 0
%22 = extractelement <2 x i1> %20, i32 1
%23 = or i1 %21, %22
ret i1 %23
24: ; preds = %3
ret i1 false
}
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20191021/a72f35be/attachment.html>
More information about the llvm-bugs
mailing list